[arrayfire] 09/284: Async CPU Copy, Assign, and Index

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:13 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 96c5602965334c5f36f33dc69ad81314fd6e6bd7
Author: Umar Arshad <umar at arrayfire.com>
Date:   Thu Aug 13 17:25:31 2015 -0400

    Async CPU Copy, Assign, and Index
---
 src/backend/cpu/assign.cpp |  9 ++++++--
 src/backend/cpu/copy.cpp   |  6 ++---
 src/backend/cpu/index.cpp  | 56 ++++++++++++++++++++++++++--------------------
 3 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/src/backend/cpu/assign.cpp b/src/backend/cpu/assign.cpp
index c0a177f..589fa53 100644
--- a/src/backend/cpu/assign.cpp
+++ b/src/backend/cpu/assign.cpp
@@ -16,9 +16,12 @@
 #include <err_cpu.hpp>
 #include <platform.hpp>
 #include <async_queue.hpp>
+#include <array>
 
 using af::dim4;
 using std::ref;
+using std::copy;
+using std::array;
 
 namespace cpu
 {
@@ -37,7 +40,7 @@ dim_t trimIndex(int idx, const dim_t &len)
 }
 
 template<typename T>
-void assign_(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
+void assign_(Array<T> out, const array<af_index_t, 4> idxrs, const Array<T> rhs)
 {
     bool isSeq[4];
     std::vector<af_seq> seqs(4, af_span);
@@ -117,7 +120,9 @@ void assign_(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
 template<typename T>
 void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs)
 {
-    getQueue().enqueue(assign_<T>, ref(out), idxrs, ref(rhs));
+    array<af_index_t, 4> idx;
+    copy(idxrs, idxrs+4, begin(idx));
+    getQueue().enqueue(assign_<T>, out, move(idx), rhs);
 }
 
 #define INSTANTIATE(T) \
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index 35c1ebe..433e718 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -117,7 +117,7 @@ namespace cpu
     template<typename T>
     void multiply_inplace(Array<T> &in, double val)
     {
-        copy<T, T>(in, in, 0, val);
+        getQueue().enqueue(copy<T, T>,in, in, 0, val);
     }
 
     template<typename inType, typename outType>
@@ -126,14 +126,14 @@ namespace cpu
              outType default_value, double factor)
     {
         Array<outType> ret = createValueArray<outType>(dims, default_value);
-        copy<inType, outType>(ret, in, outType(default_value), factor);
+        getQueue().enqueue(copy<inType, outType>,ret, in, outType(default_value), factor);
         return ret;
     }
 
     template<typename inType, typename outType>
     void copyArray(Array<outType> &out, Array<inType> const &in)
     {
-        copy<inType, outType>(out, in, scalar<outType>(0), 1.0);
+        getQueue().enqueue(copy<inType, outType>,out, in, scalar<outType>(0), 1.0);
     }
 
 
diff --git a/src/backend/cpu/index.cpp b/src/backend/cpu/index.cpp
index 162e67f..c6112fa 100644
--- a/src/backend/cpu/index.cpp
+++ b/src/backend/cpu/index.cpp
@@ -15,6 +15,8 @@
 #include <handle.hpp>
 #include <err_cpu.hpp>
 #include <vector>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 using af::dim4;
 
@@ -68,43 +70,49 @@ Array<T> index(const Array<T>& in, const af_index_t idxrs[])
     Array<T> out = createEmptyArray<T>(oDims);
     dim4 oStrides= out.strides();
 
-    const T *src = in.get();
-    T *dst = out.get();
 
-    const uint* ptr0 = idxArrs[0].get();
-    const uint* ptr1 = idxArrs[1].get();
-    const uint* ptr2 = idxArrs[2].get();
-    const uint* ptr3 = idxArrs[3].get();
+    auto func = [=] (Array<T> out, const Array<T> in) {
 
-    for (dim_t l=0; l<oDims[3]; ++l) {
+        const T *src = in.get();
+              T *dst = out.get();
 
-        dim_t lOff   = l*oStrides[3];
-        dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]);
-        dim_t inOff3 = inIdx3*iStrds[3];
+        const uint* ptr0 = idxArrs[0].get();
+        const uint* ptr1 = idxArrs[1].get();
+        const uint* ptr2 = idxArrs[2].get();
+        const uint* ptr3 = idxArrs[3].get();
 
-        for (dim_t k=0; k<oDims[2]; ++k) {
+        for (dim_t l=0; l<oDims[3]; ++l) {
 
-            dim_t kOff   = k*oStrides[2];
-            dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]);
-            dim_t inOff2 = inIdx2*iStrds[2];
+            dim_t lOff   = l*oStrides[3];
+            dim_t inIdx3 = trimIndex(isSeq[3] ? l+iOffs[3] : ptr3[l], iDims[3]);
+            dim_t inOff3 = inIdx3*iStrds[3];
 
-            for (dim_t j=0; j<oDims[1]; ++j) {
+            for (dim_t k=0; k<oDims[2]; ++k) {
 
-                dim_t jOff   = j*oStrides[1];
-                dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]);
-                dim_t inOff1 = inIdx1*iStrds[1];
+                dim_t kOff   = k*oStrides[2];
+                dim_t inIdx2 = trimIndex(isSeq[2] ? k+iOffs[2] : ptr2[k], iDims[2]);
+                dim_t inOff2 = inIdx2*iStrds[2];
 
-                for (dim_t i=0; i<oDims[0]; ++i) {
+                for (dim_t j=0; j<oDims[1]; ++j) {
 
-                    dim_t iOff   = i*oStrides[0];
-                    dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]);
-                    dim_t inOff0 = inIdx0*iStrds[0];
+                    dim_t jOff   = j*oStrides[1];
+                    dim_t inIdx1 = trimIndex(isSeq[1] ? j+iOffs[1] : ptr1[j], iDims[1]);
+                    dim_t inOff1 = inIdx1*iStrds[1];
 
-                    dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0];
+                    for (dim_t i=0; i<oDims[0]; ++i) {
+
+                        dim_t iOff   = i*oStrides[0];
+                        dim_t inIdx0 = trimIndex(isSeq[0] ? i+iOffs[0] : ptr0[i], iDims[0]);
+                        dim_t inOff0 = inIdx0*iStrds[0];
+
+                        dst[lOff+kOff+jOff+iOff] = src[inOff3+inOff2+inOff1+inOff0];
+                    }
                 }
             }
         }
-    }
+    };
+
+    getQueue().enqueue(func, out, in);
 
     return out;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list