[arrayfire] 06/284: Async CPU Convolve

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:13 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 759b506fed2406bac094ec26b4fad293cd09f0e9
Author: Umar Arshad <umar at arrayfire.com>
Date:   Mon Aug 10 22:08:55 2015 -0400

    Async CPU Convolve
---
 src/backend/cpu/convolve.cpp | 47 +++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/src/backend/cpu/convolve.cpp b/src/backend/cpu/convolve.cpp
index 33670d4..a5d7ded 100644
--- a/src/backend/cpu/convolve.cpp
+++ b/src/backend/cpu/convolve.cpp
@@ -14,6 +14,8 @@
 #include <convolve.hpp>
 #include <err_cpu.hpp>
 #include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 using af::dim4;
 
@@ -204,8 +206,8 @@ Array<T> convolve(Array<T> const& signal, Array<accT> const& filter, ConvolveBat
 
     Array<T> out = createEmptyArray<T>(oDims);
 
-    convolve_nd<T, accT, baseDim, expand>(out.get(), signal.get(), filter.get(),
-            oDims, sDims, fDims, out.strides(), sStrides, filter.strides(), kind);
+    getQueue().enqueue(convolve_nd<T, accT, baseDim, expand>,out.get(), signal.get(), filter.get(),
+                                              oDims, sDims, fDims, out.strides(), sStrides, filter.strides(), kind);
 
     return out;
 }
@@ -271,32 +273,37 @@ Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<ac
         oDims[1] += rflen - 1;
     }
 
-    Array<T> temp = createEmptyArray<T>(tDims);
     Array<T> out  = createEmptyArray<T>(oDims);
-    auto tStrides = temp.strides();
-    auto oStrides = out.strides();
 
-    for (dim_t b3=0; b3<oDims[3]; ++b3) {
+    auto func = [=] (Array<T> out) {
+        Array<T> temp = createEmptyArray<T>(tDims);
+        auto tStrides = temp.strides();
+        auto oStrides = out.strides();
 
-        dim_t i_b3Off = b3*sStrides[3];
-        dim_t t_b3Off = b3*tStrides[3];
-        dim_t o_b3Off = b3*oStrides[3];
+        for (dim_t b3=0; b3<oDims[3]; ++b3) {
 
-        for (dim_t b2=0; b2<oDims[2]; ++b2) {
+            dim_t i_b3Off = b3*sStrides[3];
+            dim_t t_b3Off = b3*tStrides[3];
+            dim_t o_b3Off = b3*oStrides[3];
 
-            T const *iptr = signal.get()+ b2*sStrides[2] + i_b3Off;
-            T *tptr = temp.get() + b2*tStrides[2] + t_b3Off;
-            T *optr = out.get()  + b2*oStrides[2] + o_b3Off;
+            for (dim_t b2=0; b2<oDims[2]; ++b2) {
 
-            convolve2_separable<T, accT, 0, expand>(tptr, iptr, c_filter.get(),
-                    tDims, sDims, sDims, cflen,
-                    tStrides, sStrides, c_filter.strides()[0]);
+                T const *iptr = signal.get()+ b2*sStrides[2] + i_b3Off;
+                T *tptr = temp.get() + b2*tStrides[2] + t_b3Off;
+                T *optr = out.get()  + b2*oStrides[2] + o_b3Off;
 
-            convolve2_separable<T, accT, 1, expand>(optr, tptr, r_filter.get(),
-                    oDims, tDims, sDims, rflen,
-                    oStrides, tStrides, r_filter.strides()[0]);
+                convolve2_separable<T, accT, 0, expand>(tptr, iptr, c_filter.get(),
+                        tDims, sDims, sDims, cflen,
+                        tStrides, sStrides, c_filter.strides()[0]);
+
+                convolve2_separable<T, accT, 1, expand>(optr, tptr, r_filter.get(),
+                        oDims, tDims, sDims, rflen,
+                        oStrides, tStrides, r_filter.strides()[0]);
+            }
         }
-    }
+    };
+
+    getQueue().enqueue(func, out);
 
     return out;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list