[arrayfire] 29/284: Converted wrap & unwrap cpu fns to async calls

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit d0223f980047dfee315569eaf359105377e978b7
Author: pradeep <pradeep at arrayfire.com>
Date:   Fri Nov 20 15:48:10 2015 -0500

    Converted wrap & unwrap cpu fns to async calls
---
 src/backend/cpu/unwrap.cpp | 173 +++++++++++++++++++++++----------------------
 src/backend/cpu/wrap.cpp   | 171 ++++++++++++++++++++++----------------------
 2 files changed, 175 insertions(+), 169 deletions(-)

diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index f9c25f9..efb46be 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -13,112 +13,115 @@
 #include <err_cpu.hpp>
 #include <dispatch.hpp>
 #include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
-    template<typename T, int d>
-    void unwrap_dim(T *outPtr, const T *inPtr, const af::dim4 &odims, const af::dim4 &idims,
-                    const af::dim4 &ostrides, const af::dim4 &istrides,
-                    const dim_t wx, const dim_t wy, const dim_t sx, const dim_t sy,
-                    const dim_t px, const dim_t py)
-    {
-        dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
-
-        for(dim_t w = 0; w < odims[3]; w++) {
-            for(dim_t z = 0; z < odims[2]; z++) {
-
-                dim_t cOut = w * ostrides[3] + z * ostrides[2];
-                dim_t cIn  = w * istrides[3] + z * istrides[2];
-                const T* iptr = inPtr  + cIn;
-                T* optr_= outPtr + cOut;
-
-                for(dim_t col = 0; col < odims[d]; col++) {
-                    // Offset output ptr
-                    T* optr = optr_ + col * ostrides[d];
-
-                    // Calculate input window index
-                    dim_t winy = (col / nx);
-                    dim_t winx = (col % nx);
-
-                    dim_t startx = winx * sx;
-                    dim_t starty = winy * sy;
-
-                    dim_t spx = startx - px;
-                    dim_t spy = starty - py;
-
-                    // Short cut condition ensuring all values within input dimensions
-                    bool cond = (spx >= 0 && spx + wx < idims[0] && spy >= 0 && spy + wy < idims[1]);
-
-                    for(dim_t y = 0; y < wy; y++) {
-                        for(dim_t x = 0; x < wx; x++) {
-                            dim_t xpad = spx + x;
-                            dim_t ypad = spy + y;
-
-                            dim_t oloc = (y * wx + x);
-                            if (d == 0) oloc *= ostrides[1];
-
-                            if(cond || (xpad >= 0 && xpad < idims[0] && ypad >= 0 && ypad < idims[1])) {
-                                dim_t iloc = (ypad * istrides[1] + xpad * istrides[0]);
-                                optr[oloc] = iptr[iloc];
-                            } else {
-                                optr[oloc] = scalar<T>(0.0);
-                            }
+
+template<typename T, int d>
+void unwrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
+                const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
+{
+    const T *inPtr = in.get();
+    T *outPtr      = out.get();
+
+    af::dim4 idims    = in.dims();
+    af::dim4 odims    = out.dims();
+    af::dim4 istrides = in.strides();
+    af::dim4 ostrides = out.strides();
+
+    dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
+
+    for(dim_t w = 0; w < odims[3]; w++) {
+        for(dim_t z = 0; z < odims[2]; z++) {
+
+            dim_t cOut = w * ostrides[3] + z * ostrides[2];
+            dim_t cIn  = w * istrides[3] + z * istrides[2];
+            const T* iptr = inPtr  + cIn;
+            T* optr_= outPtr + cOut;
+
+            for(dim_t col = 0; col < odims[d]; col++) {
+                // Offset output ptr
+                T* optr = optr_ + col * ostrides[d];
+
+                // Calculate input window index
+                dim_t winy = (col / nx);
+                dim_t winx = (col % nx);
+
+                dim_t startx = winx * sx;
+                dim_t starty = winy * sy;
+
+                dim_t spx = startx - px;
+                dim_t spy = starty - py;
+
+                // Short cut condition ensuring all values within input dimensions
+                bool cond = (spx >= 0 && spx + wx < idims[0] && spy >= 0 && spy + wy < idims[1]);
+
+                for(dim_t y = 0; y < wy; y++) {
+                    for(dim_t x = 0; x < wx; x++) {
+                        dim_t xpad = spx + x;
+                        dim_t ypad = spy + y;
+
+                        dim_t oloc = (y * wx + x);
+                        if (d == 0) oloc *= ostrides[1];
+
+                        if(cond || (xpad >= 0 && xpad < idims[0] && ypad >= 0 && ypad < idims[1])) {
+                            dim_t iloc = (ypad * istrides[1] + xpad * istrides[0]);
+                            optr[oloc] = iptr[iloc];
+                        } else {
+                            optr[oloc] = scalar<T>(0.0);
                         }
                     }
                 }
             }
         }
     }
+}
 
-    template<typename T>
-    Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
-                    const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column)
-    {
-        af::dim4 idims = in.dims();
-
-        dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
-        dim_t ny = (idims[1] + 2 * py - wy) / sy + 1;
-
-        af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
+template<typename T>
+Array<T> unwrap(const Array<T> &in, const dim_t wx, const dim_t wy,
+                const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column)
+{
+    af::dim4 idims = in.dims();
 
-        if (!is_column) {
-            std::swap(odims[0], odims[1]);
-        }
+    dim_t nx = (idims[0] + 2 * px - wx) / sx + 1;
+    dim_t ny = (idims[1] + 2 * py - wy) / sy + 1;
 
-        // Create output placeholder
-        Array<T> outArray = createEmptyArray<T>(odims);
+    af::dim4 odims(wx * wy, nx * ny, idims[2], idims[3]);
 
-        // Get pointers to raw data
-        const T *inPtr = in.get();
-        T *outPtr = outArray.get();
+    if (!is_column) {
+        std::swap(odims[0], odims[1]);
+    }
 
-        af::dim4 ostrides = outArray.strides();
-        af::dim4 istrides = in.strides();
+    Array<T> outArray = createEmptyArray<T>(odims);
 
-        if (is_column) {
-            unwrap_dim<T, 1>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
-        } else {
-            unwrap_dim<T, 0>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
-        }
-        return outArray;
+    if (is_column) {
+        getQueue().enqueue(unwrap_dim<T, 1>, outArray, in, wx, wy, sx, sy, px, py);
+    } else {
+        getQueue().enqueue(unwrap_dim<T, 0>, outArray, in, wx, wy, sx, sy, px, py);
     }
 
+    return outArray;
+}
+
 
 #define INSTANTIATE(T)                                                                  \
     template Array<T> unwrap<T> (const Array<T> &in, const dim_t wx, const dim_t wy,    \
                     const dim_t sx, const dim_t sy, const dim_t px, const dim_t py, const bool is_column);
 
 
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
-    INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(intl)
-    INSTANTIATE(uintl)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
-    INSTANTIATE(short)
-    INSTANTIATE(ushort)
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
+
 }
diff --git a/src/backend/cpu/wrap.cpp b/src/backend/cpu/wrap.cpp
index a04a6f5..3ff54de 100644
--- a/src/backend/cpu/wrap.cpp
+++ b/src/backend/cpu/wrap.cpp
@@ -13,92 +13,95 @@
 #include <err_cpu.hpp>
 #include <dispatch.hpp>
 #include <math.hpp>
+#include <platform.hpp>
+#include <async_queue.hpp>
 
 namespace cpu
 {
 
-    template<typename T, int d>
-    void wrap_dim(T *outPtr, const T *inPtr,
-                  const af::dim4 &odims, const af::dim4 &idims,
-                  const af::dim4 &ostrides, const af::dim4 &istrides,
-                  const dim_t wx, const dim_t wy,
-                  const dim_t sx, const dim_t sy,
-                  const dim_t px, const dim_t py)
-    {
-        dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;
-
-        for(dim_t w = 0; w < idims[3]; w++) {
-            for(dim_t z = 0; z < idims[2]; z++) {
-
-                dim_t cIn  = w * istrides[3] + z * istrides[2];
-                dim_t cOut = w * ostrides[3] + z * ostrides[2];
-                const T* iptr_ = inPtr  + cIn;
-                T* optr= outPtr + cOut;
-
-                for(dim_t col = 0; col < idims[d]; col++) {
-                    // Offset output ptr
-                    const T* iptr = iptr_ + col * istrides[d];
-
-                    // Calculate input window index
-                    dim_t winy = (col / nx);
-                    dim_t winx = (col % nx);
-
-                    dim_t startx = winx * sx;
-                    dim_t starty = winy * sy;
-
-                    dim_t spx = startx - px;
-                    dim_t spy = starty - py;
-
-                    // Short cut condition ensuring all values within input dimensions
-                    bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);
-
-                    for(dim_t y = 0; y < wy; y++) {
-                        for(dim_t x = 0; x < wx; x++) {
-                            dim_t xpad = spx + x;
-                            dim_t ypad = spy + y;
-
-                            dim_t iloc = (y * wx + x);
-                            if (d == 0) iloc *= istrides[1];
-
-                            if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
-                                dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
-                                // FIXME: When using threads, atomize this
-                                optr[oloc] += iptr[iloc];
-                            }
+template<typename T, int d>
+void wrap_dim(Array<T> out, const Array<T> in, const dim_t wx, const dim_t wy,
+              const dim_t sx, const dim_t sy, const dim_t px, const dim_t py)
+{
+    const T *inPtr = in.get();
+    T *outPtr      = out.get();
+
+    af::dim4 idims    = in.dims();
+    af::dim4 odims    = out.dims();
+    af::dim4 istrides = in.strides();
+    af::dim4 ostrides = out.strides();
+
+    dim_t nx = (odims[0] + 2 * px - wx) / sx + 1;
+
+    for(dim_t w = 0; w < idims[3]; w++) {
+        for(dim_t z = 0; z < idims[2]; z++) {
+
+            dim_t cIn  = w * istrides[3] + z * istrides[2];
+            dim_t cOut = w * ostrides[3] + z * ostrides[2];
+            const T* iptr_ = inPtr  + cIn;
+            T* optr= outPtr + cOut;
+
+            for(dim_t col = 0; col < idims[d]; col++) {
+                // Offset output ptr
+                const T* iptr = iptr_ + col * istrides[d];
+
+                // Calculate input window index
+                dim_t winy = (col / nx);
+                dim_t winx = (col % nx);
+
+                dim_t startx = winx * sx;
+                dim_t starty = winy * sy;
+
+                dim_t spx = startx - px;
+                dim_t spy = starty - py;
+
+                // Short cut condition ensuring all values within input dimensions
+                bool cond = (spx >= 0 && spx + wx < odims[0] && spy >= 0 && spy + wy < odims[1]);
+
+                for(dim_t y = 0; y < wy; y++) {
+                    for(dim_t x = 0; x < wx; x++) {
+                        dim_t xpad = spx + x;
+                        dim_t ypad = spy + y;
+
+                        dim_t iloc = (y * wx + x);
+                        if (d == 0) iloc *= istrides[1];
+
+                        if(cond || (xpad >= 0 && xpad < odims[0] && ypad >= 0 && ypad < odims[1])) {
+                            dim_t oloc = (ypad * ostrides[1] + xpad * ostrides[0]);
+                            // FIXME: When using threads, atomize this
+                            optr[oloc] += iptr[iloc];
                         }
                     }
                 }
             }
         }
     }
+}
 
-    template<typename T>
-    Array<T> wrap(const Array<T> &in,
-                  const dim_t ox, const dim_t oy,
-                  const dim_t wx, const dim_t wy,
-                  const dim_t sx, const dim_t sy,
-                  const dim_t px, const dim_t py,
-                  const bool is_column)
-    {
-        af::dim4 idims = in.dims();
-        af::dim4 odims(ox, oy, idims[2], idims[3]);
-        Array<T> out = createValueArray<T>(odims, scalar<T>(0));
-
-        const T *inPtr = in.get();
-        T *outPtr = out.get();
-
-        af::dim4 istrides = in.strides();
-        af::dim4 ostrides = out.strides();
-
-        if (is_column) {
-            wrap_dim<T, true >(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
-        } else {
-            wrap_dim<T, false>(outPtr, inPtr, odims, idims, ostrides, istrides, wx, wy, sx, sy, px, py);
-        }
+template<typename T>
+Array<T> wrap(const Array<T> &in,
+              const dim_t ox, const dim_t oy,
+              const dim_t wx, const dim_t wy,
+              const dim_t sx, const dim_t sy,
+              const dim_t px, const dim_t py,
+              const bool is_column)
+{
+    af::dim4 idims = in.dims();
+    af::dim4 odims(ox, oy, idims[2], idims[3]);
+
+    Array<T> out = createValueArray<T>(odims, scalar<T>(0));
+    out.eval();
+    in.eval();
 
-        return out;
+    if (is_column) {
+        getQueue().enqueue(wrap_dim<T, 1>, out, in, wx, wy, sx, sy, px, py);
+    } else {
+        getQueue().enqueue(wrap_dim<T, 0>, out, in, wx, wy, sx, sy, px, py);
     }
 
+    return out;
+}
+
 
 #define INSTANTIATE(T)                                          \
     template Array<T> wrap<T> (const Array<T> &in,              \
@@ -108,17 +111,17 @@ namespace cpu
                                const dim_t px, const dim_t py,  \
                                const bool is_column);
 
+INSTANTIATE(float)
+INSTANTIATE(double)
+INSTANTIATE(cfloat)
+INSTANTIATE(cdouble)
+INSTANTIATE(int)
+INSTANTIATE(uint)
+INSTANTIATE(intl)
+INSTANTIATE(uintl)
+INSTANTIATE(uchar)
+INSTANTIATE(char)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 
-    INSTANTIATE(float)
-    INSTANTIATE(double)
-    INSTANTIATE(cfloat)
-    INSTANTIATE(cdouble)
-    INSTANTIATE(int)
-    INSTANTIATE(uint)
-    INSTANTIATE(intl)
-    INSTANTIATE(uintl)
-    INSTANTIATE(uchar)
-    INSTANTIATE(char)
-    INSTANTIATE(short)
-    INSTANTIATE(ushort)
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list