[arrayfire] 171/284: Add OpenCL-CPU fallback for LU

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:30 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit 45abbc35741f5e04a6c9655b30bd5dc3f7b47b46
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Fri Jan 8 11:02:11 2016 -0500

    Add OpenCL-CPU fallback for LU
---
 src/backend/opencl/cpu/cpu_lapack_helper.hpp |  35 ++++++
 src/backend/opencl/cpu/cpu_lu.cpp            | 178 +++++++++++++++++++++++++++
 src/backend/opencl/cpu/cpu_lu.hpp            |  22 ++++
 src/backend/opencl/lu.cpp                    |  11 +-
 4 files changed, 245 insertions(+), 1 deletion(-)

diff --git a/src/backend/opencl/cpu/cpu_lapack_helper.hpp b/src/backend/opencl/cpu/cpu_lapack_helper.hpp
new file mode 100644
index 0000000..174022e
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lapack_helper.hpp
@@ -0,0 +1,35 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef AFCPU_LAPACK
+#define AFCPU_LAPACK
+
+#include <types.hpp>
+
+#define lapack_complex_float opencl::cfloat
+#define lapack_complex_double opencl::cdouble
+#define LAPACK_PREFIX LAPACKE_
+#define ORDER_TYPE int
+#define AF_LAPACK_COL_MAJOR LAPACK_COL_MAJOR
+#define LAPACK_NAME(fn) LAPACKE_##fn
+
+#ifdef __APPLE__
+#include <Accelerate/Accelerate.h>
+#include <lapacke.hpp>
+#undef AF_LAPACK_COL_MAJOR
+#define AF_LAPACK_COL_MAJOR 0
+#else
+#ifdef USE_MKL
+#include<mkl_lapacke.h>
+#else // NETLIB LAPACKE
+#include<lapacke.h>
+#endif
+#endif
+
+#endif
diff --git a/src/backend/opencl/cpu/cpu_lu.cpp b/src/backend/opencl/cpu/cpu_lu.cpp
new file mode 100644
index 0000000..f415cb3
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lu.cpp
@@ -0,0 +1,178 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cpu/cpu_lapack_helper.hpp>
+#include <cpu/cpu_lu.hpp>
+#include <err_common.hpp>
+
+#include <af/dim4.hpp>
+#include <handle.hpp>
+#include <iostream>
+#include <cassert>
+
+#include <range.hpp>
+
+namespace opencl
+{
+namespace cpu
+{
+
+template<typename T>
+using getrf_func_def = int (*)(ORDER_TYPE, int, int,
+                               T*, int,
+                               int*);
+
+#define LU_FUNC_DEF( FUNC )                                     \
+template<typename T> FUNC##_func_def<T> FUNC##_func();
+
+
+#define LU_FUNC( FUNC, TYPE, PREFIX )                           \
+template<> FUNC##_func_def<TYPE>     FUNC##_func<TYPE>()        \
+{ return & LAPACK_NAME(PREFIX##FUNC); }
+
+LU_FUNC_DEF( getrf )
+LU_FUNC(getrf , float  , s)
+LU_FUNC(getrf , double , d)
+LU_FUNC(getrf , cfloat , c)
+LU_FUNC(getrf , cdouble, z)
+
+template<typename T>
+void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
+{
+    T *l = getMappedPtr<T>(lower.get());
+    T *u = getMappedPtr<T>(upper.get());
+    T *i = getMappedPtr<T>(in.get());
+
+    dim4 ldm = lower.dims();
+    dim4 udm = upper.dims();
+    dim4 idm = in.dims();
+
+    dim4 lst = lower.strides();
+    dim4 ust = upper.strides();
+    dim4 ist = in.strides();
+
+    for(dim_t ow = 0; ow < idm[3]; ow++) {
+        const dim_t lW = ow * lst[3];
+        const dim_t uW = ow * ust[3];
+        const dim_t iW = ow * ist[3];
+
+        for(dim_t oz = 0; oz < idm[2]; oz++) {
+            const dim_t lZW = lW + oz * lst[2];
+            const dim_t uZW = uW + oz * ust[2];
+            const dim_t iZW = iW + oz * ist[2];
+
+            for(dim_t oy = 0; oy < idm[1]; oy++) {
+                const dim_t lYZW = lZW + oy * lst[1];
+                const dim_t uYZW = uZW + oy * ust[1];
+                const dim_t iYZW = iZW + oy * ist[1];
+
+                for(dim_t ox = 0; ox < idm[0]; ox++) {
+                    const dim_t lMem = lYZW + ox;
+                    const dim_t uMem = uYZW + ox;
+                    const dim_t iMem = iYZW + ox;
+                    if(ox > oy) {
+                        if(oy < ldm[1])
+                            l[lMem] = i[iMem];
+                        if(ox < udm[0])
+                            u[uMem] = scalar<T>(0);
+                    } else if (oy > ox) {
+                        if(oy < ldm[1])
+                            l[lMem] = scalar<T>(0);
+                        if(ox < udm[0])
+                            u[uMem] = i[iMem];
+                    } else if(ox == oy) {
+                        if(oy < ldm[1])
+                            l[lMem] = scalar<T>(1.0);
+                        if(ox < udm[0])
+                            u[uMem] = i[iMem];
+                    }
+                }
+            }
+        }
+    }
+
+    unmapPtr(lower.get(), l);
+    unmapPtr(upper.get(), u);
+    unmapPtr(in.get(), i);
+}
+
+void convertPivot(Array<int> &pivot, int out_sz)
+{
+    Array<int> p = range<int>(dim4(out_sz), 0); // Runs opencl
+
+    int *d_pi = getMappedPtr<int>(pivot.get());
+    int *d_po = getMappedPtr<int>(p.get());
+
+    dim_t d0 = pivot.dims()[0];
+
+    for(int j = 0; j < (int)d0; j++) {
+        // 1 indexed in pivot
+        std::swap(d_po[j], d_po[d_pi[j] - 1]);
+    }
+
+    unmapPtr(pivot.get(), d_pi);
+    unmapPtr(p.get(), d_po);
+
+    pivot = p;
+}
+
+template<typename T>
+void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
+{
+    dim4 iDims = in.dims();
+    int M = iDims[0];
+    int N = iDims[1];
+
+    Array<T> in_copy = copyArray<T>(in);
+    pivot = lu_inplace(in_copy);
+
+    // SPLIT into lower and upper
+    dim4 ldims(M, min(M, N));
+    dim4 udims(min(M, N), N);
+    lower = createEmptyArray<T>(ldims);
+    upper = createEmptyArray<T>(udims);
+
+    lu_split<T>(lower, upper, in_copy);
+}
+
+template<typename T>
+Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
+{
+    dim4 iDims = in.dims();
+    int M = iDims[0];
+    int N = iDims[1];
+
+    Array<int> pivot = createEmptyArray<int>(af::dim4(min(M, N), 1, 1, 1));
+
+    T *inPtr = getMappedPtr<T>(in.get());
+    int *pivotPtr = getMappedPtr<int>(pivot.get());
+
+    getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N,
+                    inPtr, in.strides()[1],
+                    pivotPtr);
+
+    unmapPtr(in.get(), inPtr);
+    unmapPtr(pivot.get(), pivotPtr);
+
+    if(convert_pivot) convertPivot(pivot, M);
+
+    return pivot;
+}
+
+#define INSTANTIATE_LU(T)                                                                           \
+    template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot);                      \
+    template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
+
+INSTANTIATE_LU(float)
+INSTANTIATE_LU(cfloat)
+INSTANTIATE_LU(double)
+INSTANTIATE_LU(cdouble)
+
+}
+}
diff --git a/src/backend/opencl/cpu/cpu_lu.hpp b/src/backend/opencl/cpu/cpu_lu.hpp
new file mode 100644
index 0000000..6c038f2
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lu.hpp
@@ -0,0 +1,22 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace opencl
+{
+namespace cpu
+{
+    template<typename T>
+    void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
+
+    template<typename T>
+    Array<int> lu_inplace(Array<T> &in, const bool convert_pivot = true);
+}
+}
diff --git a/src/backend/opencl/lu.cpp b/src/backend/opencl/lu.cpp
index 2d94d4d..0bc6bd5 100644
--- a/src/backend/opencl/lu.cpp
+++ b/src/backend/opencl/lu.cpp
@@ -14,7 +14,9 @@
 #include <kernel/lu_split.hpp>
 #include <copy.hpp>
 #include <blas.hpp>
+#include <platform.hpp>
 #include <magma/magma.h>
+#include <cpu/cpu_lu.hpp>
 
 namespace opencl
 {
@@ -41,8 +43,11 @@ Array<int> convertPivot(int *ipiv, int in_sz, int out_sz)
 template<typename T>
 void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
 {
-
     try {
+        if(OpenCLCPUOffload()) {
+            return cpu::lu(lower, upper, pivot, in);
+        }
+
         dim4 iDims = in.dims();
         int M = iDims[0];
         int N = iDims[1];
@@ -67,6 +72,10 @@ template<typename T>
 Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
 {
     try {
+        if(OpenCLCPUOffload()) {
+            return cpu::lu_inplace(in, convert_pivot);
+        }
+
         initBlas();
         dim4 iDims = in.dims();
         int M = iDims[0];

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list