[arrayfire] 15/284: Async CPU reduce and ireduce

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:14 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 1a0802fb2fe22930e81613faa340038b68e0e2e2
Author: Umar Arshad <umar at arrayfire.com>
Date:   Tue Sep 22 13:05:17 2015 -0400

    Async CPU reduce and ireduce
---
 src/backend/cpu/ireduce.cpp | 42 +++++++++++++++++-------------------------
 src/backend/cpu/reduce.cpp  | 15 ++++++++++-----
 2 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/backend/cpu/ireduce.cpp b/src/backend/cpu/ireduce.cpp
index 199a0be..d3a76d9 100644
--- a/src/backend/cpu/ireduce.cpp
+++ b/src/backend/cpu/ireduce.cpp
@@ -14,6 +14,9 @@
 #include <Array.hpp>
 #include <ireduce.hpp>
 
+#include <platform.hpp>
+#include <async_queue.hpp>
+
 using af::dim4;
 
 namespace cpu
@@ -106,42 +109,31 @@ namespace cpu
     };
 
     template<af_op_t op, typename T>
+    using ireduce_dim_func = std::function<void(T *out, const dim4 ostrides, const dim4 odims,
+                                                uint *loc,
+                                                const T *in , const dim4 istrides, const dim4 idims,
+                                                const int dim)>;
+
+    template<af_op_t op, typename T>
     void ireduce(Array<T> &out, Array<uint> &loc,
                  const Array<T> &in, const int dim)
     {
         dim4 odims = in.dims();
         odims[dim] = 1;
+        static const ireduce_dim_func<op, T> ireduce_funcs[] = { ireduce_dim<op, T, 1>()
+                                                               , ireduce_dim<op, T, 2>()
+                                                               , ireduce_dim<op, T, 3>()
+                                                               , ireduce_dim<op, T, 4>()};
 
-        switch (in.ndims()) {
-        case 1:
-            ireduce_dim<op, T, 1>()(out.get(), out.strides(), out.dims(),
-                                    loc.get(),
-                                    in.get(), in.strides(), in.dims(), dim);
-            break;
-
-        case 2:
-            ireduce_dim<op, T, 2>()(out.get(), out.strides(), out.dims(),
-                                    loc.get(),
-                                    in.get(), in.strides(), in.dims(), dim);
-            break;
-
-        case 3:
-            ireduce_dim<op, T, 3>()(out.get(), out.strides(), out.dims(),
-                                    loc.get(),
-                                    in.get(), in.strides(), in.dims(), dim);
-            break;
-
-        case 4:
-            ireduce_dim<op, T, 4>()(out.get(), out.strides(), out.dims(),
-                                    loc.get(),
-                                    in.get(), in.strides(), in.dims(), dim);
-            break;
-        }
+        getQueue().enqueue(ireduce_funcs[in.ndims() - 1], out.get(), out.strides(), out.dims(),
+                           loc.get(), in.get(), in.strides(), in.dims(), dim);
     }
 
     template<af_op_t op, typename T>
     T ireduce_all(unsigned *loc, const Array<T> &in)
     {
+        evalArray(in);
+        getQueue().sync();
         af::dim4 dims = in.dims();
         af::dim4 strides = in.strides();
         const T *inPtr = in.get();
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index 5724508..8ce7d0d 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -16,6 +16,9 @@
 #include <functional>
 #include <complex>
 
+#include <platform.hpp>
+#include <async_queue.hpp>
+
 using af::dim4;
 
 namespace cpu
@@ -74,12 +77,12 @@ namespace cpu
         odims[dim] = 1;
 
         Array<To> out = createEmptyArray<To>(odims);
-        static reduce_dim_func<op, Ti, To>  reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
-                                                              , reduce_dim<op, Ti, To, 2>()
-                                                              , reduce_dim<op, Ti, To, 3>()
-                                                              , reduce_dim<op, Ti, To, 4>()};
+        static const reduce_dim_func<op, Ti, To>  reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
+                                                                    , reduce_dim<op, Ti, To, 2>()
+                                                                    , reduce_dim<op, Ti, To, 3>()
+                                                                    , reduce_dim<op, Ti, To, 4>()};
 
-        reduce_funcs[in.ndims() - 1](out.get(), out.strides(), out.dims(),
+        getQueue().enqueue(reduce_funcs[in.ndims() - 1],out.get(), out.strides(), out.dims(),
                                      in.get(), in.strides(), in.dims(), dim,
                                      change_nan, nanval);
 
@@ -89,6 +92,8 @@ namespace cpu
     template<af_op_t op, typename Ti, typename To>
     To reduce_all(const Array<Ti> &in, bool change_nan, double nanval)
     {
+        evalArray(in);
+        getQueue().sync();
         Transform<Ti, To, op> transform;
         Binary<To, op> reduce;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list