[arrayfire] 171/284: Add OpenCL-CPU fallback for LU
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Sun Feb 7 18:59:30 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.
commit 45abbc35741f5e04a6c9655b30bd5dc3f7b47b46
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Fri Jan 8 11:02:11 2016 -0500
Add OpenCL-CPU fallback for LU
---
src/backend/opencl/cpu/cpu_lapack_helper.hpp | 35 ++++++
src/backend/opencl/cpu/cpu_lu.cpp | 178 +++++++++++++++++++++++++++
src/backend/opencl/cpu/cpu_lu.hpp | 22 ++++
src/backend/opencl/lu.cpp | 11 +-
4 files changed, 245 insertions(+), 1 deletion(-)
diff --git a/src/backend/opencl/cpu/cpu_lapack_helper.hpp b/src/backend/opencl/cpu/cpu_lapack_helper.hpp
new file mode 100644
index 0000000..174022e
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lapack_helper.hpp
@@ -0,0 +1,35 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#ifndef AFCPU_LAPACK
+#define AFCPU_LAPACK
+
+#include <types.hpp>
+
+#define lapack_complex_float opencl::cfloat
+#define lapack_complex_double opencl::cdouble
+#define LAPACK_PREFIX LAPACKE_
+#define ORDER_TYPE int
+#define AF_LAPACK_COL_MAJOR LAPACK_COL_MAJOR
+#define LAPACK_NAME(fn) LAPACKE_##fn
+
+#ifdef __APPLE__
+#include <Accelerate/Accelerate.h>
+#include <lapacke.hpp>
+#undef AF_LAPACK_COL_MAJOR
+#define AF_LAPACK_COL_MAJOR 0
+#else
+#ifdef USE_MKL
+#include<mkl_lapacke.h>
+#else // NETLIB LAPACKE
+#include<lapacke.h>
+#endif
+#endif
+
+#endif
diff --git a/src/backend/opencl/cpu/cpu_lu.cpp b/src/backend/opencl/cpu/cpu_lu.cpp
new file mode 100644
index 0000000..f415cb3
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lu.cpp
@@ -0,0 +1,178 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <cpu/cpu_lapack_helper.hpp>
+#include <cpu/cpu_lu.hpp>
+#include <err_common.hpp>
+
+#include <af/dim4.hpp>
+#include <handle.hpp>
+#include <iostream>
+#include <cassert>
+
+#include <range.hpp>
+
+namespace opencl
+{
+namespace cpu
+{
+
+template<typename T>
+using getrf_func_def = int (*)(ORDER_TYPE, int, int,
+ T*, int,
+ int*);
+
+#define LU_FUNC_DEF( FUNC ) \
+template<typename T> FUNC##_func_def<T> FUNC##_func();
+
+
+#define LU_FUNC( FUNC, TYPE, PREFIX ) \
+template<> FUNC##_func_def<TYPE> FUNC##_func<TYPE>() \
+{ return & LAPACK_NAME(PREFIX##FUNC); }
+
+LU_FUNC_DEF( getrf )
+LU_FUNC(getrf , float , s)
+LU_FUNC(getrf , double , d)
+LU_FUNC(getrf , cfloat , c)
+LU_FUNC(getrf , cdouble, z)
+
+template<typename T>
+void lu_split(Array<T> &lower, Array<T> &upper, const Array<T> &in)
+{
+ T *l = getMappedPtr<T>(lower.get());
+ T *u = getMappedPtr<T>(upper.get());
+ T *i = getMappedPtr<T>(in.get());
+
+ dim4 ldm = lower.dims();
+ dim4 udm = upper.dims();
+ dim4 idm = in.dims();
+
+ dim4 lst = lower.strides();
+ dim4 ust = upper.strides();
+ dim4 ist = in.strides();
+
+ for(dim_t ow = 0; ow < idm[3]; ow++) {
+ const dim_t lW = ow * lst[3];
+ const dim_t uW = ow * ust[3];
+ const dim_t iW = ow * ist[3];
+
+ for(dim_t oz = 0; oz < idm[2]; oz++) {
+ const dim_t lZW = lW + oz * lst[2];
+ const dim_t uZW = uW + oz * ust[2];
+ const dim_t iZW = iW + oz * ist[2];
+
+ for(dim_t oy = 0; oy < idm[1]; oy++) {
+ const dim_t lYZW = lZW + oy * lst[1];
+ const dim_t uYZW = uZW + oy * ust[1];
+ const dim_t iYZW = iZW + oy * ist[1];
+
+ for(dim_t ox = 0; ox < idm[0]; ox++) {
+ const dim_t lMem = lYZW + ox;
+ const dim_t uMem = uYZW + ox;
+ const dim_t iMem = iYZW + ox;
+ if(ox > oy) {
+ if(oy < ldm[1])
+ l[lMem] = i[iMem];
+ if(ox < udm[0])
+ u[uMem] = scalar<T>(0);
+ } else if (oy > ox) {
+ if(oy < ldm[1])
+ l[lMem] = scalar<T>(0);
+ if(ox < udm[0])
+ u[uMem] = i[iMem];
+ } else if(ox == oy) {
+ if(oy < ldm[1])
+ l[lMem] = scalar<T>(1.0);
+ if(ox < udm[0])
+ u[uMem] = i[iMem];
+ }
+ }
+ }
+ }
+ }
+
+ unmapPtr(lower.get(), l);
+ unmapPtr(upper.get(), u);
+ unmapPtr(in.get(), i);
+}
+
+void convertPivot(Array<int> &pivot, int out_sz)
+{
+ Array<int> p = range<int>(dim4(out_sz), 0); // Runs opencl
+
+ int *d_pi = getMappedPtr<int>(pivot.get());
+ int *d_po = getMappedPtr<int>(p.get());
+
+ dim_t d0 = pivot.dims()[0];
+
+ for(int j = 0; j < (int)d0; j++) {
+ // 1 indexed in pivot
+ std::swap(d_po[j], d_po[d_pi[j] - 1]);
+ }
+
+ unmapPtr(pivot.get(), d_pi);
+ unmapPtr(p.get(), d_po);
+
+ pivot = p;
+}
+
+template<typename T>
+void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
+{
+ dim4 iDims = in.dims();
+ int M = iDims[0];
+ int N = iDims[1];
+
+ Array<T> in_copy = copyArray<T>(in);
+ pivot = lu_inplace(in_copy);
+
+ // SPLIT into lower and upper
+ dim4 ldims(M, min(M, N));
+ dim4 udims(min(M, N), N);
+ lower = createEmptyArray<T>(ldims);
+ upper = createEmptyArray<T>(udims);
+
+ lu_split<T>(lower, upper, in_copy);
+}
+
+template<typename T>
+Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
+{
+ dim4 iDims = in.dims();
+ int M = iDims[0];
+ int N = iDims[1];
+
+ Array<int> pivot = createEmptyArray<int>(af::dim4(min(M, N), 1, 1, 1));
+
+ T *inPtr = getMappedPtr<T>(in.get());
+ int *pivotPtr = getMappedPtr<int>(pivot.get());
+
+ getrf_func<T>()(AF_LAPACK_COL_MAJOR, M, N,
+ inPtr, in.strides()[1],
+ pivotPtr);
+
+ unmapPtr(in.get(), inPtr);
+ unmapPtr(pivot.get(), pivotPtr);
+
+ if(convert_pivot) convertPivot(pivot, M);
+
+ return pivot;
+}
+
+#define INSTANTIATE_LU(T) \
+ template Array<int> lu_inplace<T>(Array<T> &in, const bool convert_pivot); \
+ template void lu<T>(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
+
+INSTANTIATE_LU(float)
+INSTANTIATE_LU(cfloat)
+INSTANTIATE_LU(double)
+INSTANTIATE_LU(cdouble)
+
+}
+}
diff --git a/src/backend/opencl/cpu/cpu_lu.hpp b/src/backend/opencl/cpu/cpu_lu.hpp
new file mode 100644
index 0000000..6c038f2
--- /dev/null
+++ b/src/backend/opencl/cpu/cpu_lu.hpp
@@ -0,0 +1,22 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace opencl
+{
+namespace cpu
+{
+ template<typename T>
+ void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in);
+
+ template<typename T>
+ Array<int> lu_inplace(Array<T> &in, const bool convert_pivot = true);
+}
+}
diff --git a/src/backend/opencl/lu.cpp b/src/backend/opencl/lu.cpp
index 2d94d4d..0bc6bd5 100644
--- a/src/backend/opencl/lu.cpp
+++ b/src/backend/opencl/lu.cpp
@@ -14,7 +14,9 @@
#include <kernel/lu_split.hpp>
#include <copy.hpp>
#include <blas.hpp>
+#include <platform.hpp>
#include <magma/magma.h>
+#include <cpu/cpu_lu.hpp>
namespace opencl
{
@@ -41,8 +43,11 @@ Array<int> convertPivot(int *ipiv, int in_sz, int out_sz)
template<typename T>
void lu(Array<T> &lower, Array<T> &upper, Array<int> &pivot, const Array<T> &in)
{
-
try {
+ if(OpenCLCPUOffload()) {
+ return cpu::lu(lower, upper, pivot, in);
+ }
+
dim4 iDims = in.dims();
int M = iDims[0];
int N = iDims[1];
@@ -67,6 +72,10 @@ template<typename T>
Array<int> lu_inplace(Array<T> &in, const bool convert_pivot)
{
try {
+ if(OpenCLCPUOffload()) {
+ return cpu::lu_inplace(in, convert_pivot);
+ }
+
initBlas();
dim4 iDims = in.dims();
int M = iDims[0];
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list