[arrayfire] 43/284: Converted cpu scan function to async call

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:17 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 9510fcb1e2554078070c28ea88c2e5078353f72d
Author: pradeep <pradeep at arrayfire.com>
Date:   Wed Dec 2 10:44:06 2015 -0500

    Converted cpu scan function to async call
    
    Added `.eval()` calls on input Array objects inside the following
    functions to ensure that the inputs are computed by the time `.get()`
    is called on these objects to get the data values.
    
    * reduce
    * setUnique
    * setIntersection
    * setUnion
---
 src/backend/cpu/reduce.cpp |   1 +
 src/backend/cpu/scan.cpp   | 154 ++++++++++++++++++++++++---------------------
 src/backend/cpu/set.cpp    |  13 ++++
 3 files changed, 96 insertions(+), 72 deletions(-)

diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index ffe9185..e01f0c5 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -89,6 +89,7 @@ namespace cpu
     {
         dim4 odims = in.dims();
         odims[dim] = 1;
+        in.eval();
 
         Array<To> out = createEmptyArray<To>(odims);
         static const reduce_dim_func<op, Ti, To>  reduce_funcs[4] = { reduce_dim<op, Ti, To, 1>()
diff --git a/src/backend/cpu/scan.cpp b/src/backend/cpu/scan.cpp
index 2bdda21..39157ca 100644
--- a/src/backend/cpu/scan.cpp
+++ b/src/backend/cpu/scan.cpp
@@ -14,102 +14,112 @@
 #include <Array.hpp>
 #include <scan.hpp>
 #include <ops.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 using af::dim4;
 
 namespace cpu
 {
-    template<af_op_t op, typename Ti, typename To, int D>
-    struct scan_dim
-    {
-        void operator()(To *out, const dim4 ostrides, const dim4 odims,
-                        const Ti *in , const dim4 istrides, const dim4 idims,
-                        const int dim)
-        {
-            const int D1 = D - 1;
-            for (dim_t i = 0; i < odims[D1]; i++) {
-                scan_dim<op, Ti, To, D1>()(out + i * ostrides[D1],
-                                           ostrides, odims,
-                                           in  + i * istrides[D1],
-                                           istrides, idims,
-                                           dim);
-                if (D1 == dim) break;
-            }
-        }
-    };
 
-    template<af_op_t op, typename Ti, typename To>
-    struct scan_dim<op, Ti, To, 0>
+template<af_op_t op, typename Ti, typename To, int D>
+struct scan_dim
+{
+    void operator()(Array<To> out, dim_t outOffset,
+                    const Array<Ti> in, dim_t inOffset,
+                    const int dim) const
     {
-        void operator()(To *out, const dim4 ostrides, const dim4 odims,
-                        const Ti *in , const dim4 istrides, const dim4 idims,
-                        const int dim)
-        {
-
-            dim_t istride = istrides[dim];
-            dim_t ostride = ostrides[dim];
-
-            Transform<Ti, To, op> transform;
-            // FIXME: Change the name to something better
-            Binary<To, op> scan;
-
-            To out_val = scan.init();
-            for (dim_t i = 0; i < idims[dim]; i++) {
-                To in_val = transform(in[i * istride]);
-                out_val = scan(in_val, out_val);
-                out[i * ostride] = out_val;
-            }
+        const dim4 odims    = out.dims();
+        const dim4 ostrides = out.strides();
+        const dim4 istrides = in.strides();
+
+        const int D1 = D - 1;
+        for (dim_t i = 0; i < odims[D1]; i++) {
+            scan_dim<op, Ti, To, D1> func;
+            getQueue().enqueue(func,
+                    out, outOffset + i * ostrides[D1],
+                    in, inOffset + i * istrides[D1], dim);
+            if (D1 == dim) break;
         }
-    };
+    }
+};
 
-    template<af_op_t op, typename Ti, typename To>
-    Array<To> scan(const Array<Ti>& in, const int dim)
+template<af_op_t op, typename Ti, typename To>
+struct scan_dim<op, Ti, To, 0>
+{
+    void operator()(Array<To> output, dim_t outOffset,
+                    const Array<Ti> input,  dim_t inOffset,
+                    const int dim) const
     {
-        dim4 dims = in.dims();
+        const Ti* in = input.get() + inOffset;
+              To* out= output.get()+ outOffset;
 
-        Array<To> out = createValueArray<To>(dims, 0);
+        const dim4 ostrides = output.strides();
+        const dim4 istrides = input.strides();
+        const dim4 idims    = input.dims();
+
+        dim_t istride = istrides[dim];
+        dim_t ostride = ostrides[dim];
+
+        Transform<Ti, To, op> transform;
+        // FIXME: Change the name to something better
+        Binary<To, op> scan;
+
+        To out_val = scan.init();
+        for (dim_t i = 0; i < idims[dim]; i++) {
+            To in_val = transform(in[i * istride]);
+            out_val = scan(in_val, out_val);
+            out[i * ostride] = out_val;
+        }
+    }
+};
 
-        switch (in.ndims()) {
+template<af_op_t op, typename Ti, typename To>
+Array<To> scan(const Array<Ti>& in, const int dim)
+{
+    dim4 dims     = in.dims();
+    Array<To> out = createValueArray<To>(dims, 0);
+    out.eval();
+    in.eval();
+
+    switch (in.ndims()) {
         case 1:
-            scan_dim<op, Ti, To, 1>()(out.get(), out.strides(), out.dims(),
-                                      in.get(), in.strides(), in.dims(), dim);
+            scan_dim<op, Ti, To, 1> func1;
+            getQueue().enqueue(func1, out, 0, in, 0, dim);
             break;
-
         case 2:
-            scan_dim<op, Ti, To, 2>()(out.get(), out.strides(), out.dims(),
-                                      in.get(), in.strides(), in.dims(), dim);
+            scan_dim<op, Ti, To, 2> func2;
+            getQueue().enqueue(func2, out, 0, in, 0, dim);
             break;
-
         case 3:
-            scan_dim<op, Ti, To, 3>()(out.get(), out.strides(), out.dims(),
-                                      in.get(), in.strides(), in.dims(), dim);
+            scan_dim<op, Ti, To, 3> func3;
+            getQueue().enqueue(func3, out, 0, in, 0, dim);
             break;
-
         case 4:
-            scan_dim<op, Ti, To, 4>()(out.get(), out.strides(), out.dims(),
-                                      in.get(), in.strides(), in.dims(), dim);
+            scan_dim<op, Ti, To, 4> func4;
+            getQueue().enqueue(func4, out, 0, in, 0, dim);
             break;
-        }
-
-        return out;
     }
 
+    return out;
+}
+
 #define INSTANTIATE(ROp, Ti, To)                                        \
     template Array<To> scan<ROp, Ti, To>(const Array<Ti> &in, const int dim); \
 
-    //accum
-    INSTANTIATE(af_add_t, float  , float  )
-    INSTANTIATE(af_add_t, double , double )
-    INSTANTIATE(af_add_t, cfloat , cfloat )
-    INSTANTIATE(af_add_t, cdouble, cdouble)
-    INSTANTIATE(af_add_t, int    , int    )
-    INSTANTIATE(af_add_t, uint   , uint   )
-    INSTANTIATE(af_add_t, intl   , intl   )
-    INSTANTIATE(af_add_t, uintl  , uintl  )
-    INSTANTIATE(af_add_t, char   , int    )
-    INSTANTIATE(af_add_t, uchar  , uint   )
-    INSTANTIATE(af_add_t, short  , int    )
-    INSTANTIATE(af_add_t, ushort , uint   )
-    INSTANTIATE(af_notzero_t, char  , uint   )
+//accum
+INSTANTIATE(af_add_t, float  , float  )
+INSTANTIATE(af_add_t, double , double )
+INSTANTIATE(af_add_t, cfloat , cfloat )
+INSTANTIATE(af_add_t, cdouble, cdouble)
+INSTANTIATE(af_add_t, int    , int    )
+INSTANTIATE(af_add_t, uint   , uint   )
+INSTANTIATE(af_add_t, intl   , intl   )
+INSTANTIATE(af_add_t, uintl  , uintl  )
+INSTANTIATE(af_add_t, char   , int    )
+INSTANTIATE(af_add_t, uchar  , uint   )
+INSTANTIATE(af_add_t, short  , int    )
+INSTANTIATE(af_add_t, ushort , uint   )
+INSTANTIATE(af_notzero_t, char  , uint)
 
 }
diff --git a/src/backend/cpu/set.cpp b/src/backend/cpu/set.cpp
index 3215e6d..d9ca084 100644
--- a/src/backend/cpu/set.cpp
+++ b/src/backend/cpu/set.cpp
@@ -18,6 +18,8 @@
 #include <sort.hpp>
 #include <err_cpu.hpp>
 #include <vector>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
@@ -28,6 +30,9 @@ namespace cpu
     Array<T> setUnique(const Array<T> &in,
                         const bool is_sorted)
     {
+        in.eval();
+        getQueue().sync();
+
         Array<T> out = createEmptyArray<T>(af::dim4());
         if (is_sorted) out = copyArray<T>(in);
         else           out = sort<T, 1>(in, 0);
@@ -46,6 +51,10 @@ namespace cpu
                        const Array<T> &second,
                        const bool is_unique)
     {
+        first.eval();
+        second.eval();
+        getQueue().sync();
+
         Array<T> uFirst = first;
         Array<T> uSecond = second;
 
@@ -78,6 +87,10 @@ namespace cpu
                           const Array<T> &second,
                           const bool is_unique)
     {
+        first.eval();
+        second.eval();
+        getQueue().sync();
+
         Array<T> uFirst = first;
         Array<T> uSecond = second;
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list