[arrayfire] 115/248: Added CPU fallback for CUDA SVD when CUDA older than 7
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:11 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.
commit 55fdae4301bf01400bc36cf213f5a248fbf30765
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date: Thu Oct 8 19:48:19 2015 -0400
Added CPU fallback for CUDA SVD when CUDA older than 7
---
src/backend/cuda/cpu_lapack/cpu_svd.cpp | 153 ++++++++++++++++++++++++++++++++
src/backend/cuda/cpu_lapack/cpu_svd.hpp | 22 +++++
src/backend/cuda/svd.cu | 34 ++++++-
3 files changed, 206 insertions(+), 3 deletions(-)
diff --git a/src/backend/cuda/cpu_lapack/cpu_svd.cpp b/src/backend/cuda/cpu_lapack/cpu_svd.cpp
new file mode 100644
index 0000000..eb71606
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_svd.cpp
@@ -0,0 +1,153 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#if defined(WITH_CPU_LINEAR_ALGEBRA)
+#include <cpu_lapack/cpu_svd.hpp>
+
+#include <Array.hpp>
+#include <svd.hpp>
+#include <err_common.hpp>
+#include <copy.hpp>
+
+#include "lapack_helper.hpp"
+
+namespace cuda
+{
+namespace cpu
+{
+
+#define SVD_FUNC_DEF( FUNC ) \
+ template<typename T,typename Tr> svd_func_def<T, Tr> svd_func();
+
+#define SVD_FUNC( FUNC, T, Tr, PREFIX ) \
+ template<> svd_func_def<T, Tr> svd_func<T, Tr>() \
+ { return & LAPACK_NAME(PREFIX##FUNC); }
+
+#if defined(USE_MKL) || defined(__APPLE__)
+
+ template<typename T, typename Tr>
+ using svd_func_def = int (*)(ORDER_TYPE,
+ char jobz,
+ int m, int n,
+ T* in, int ldin,
+ Tr* s,
+ T* u, int ldu,
+ T* vt, int ldvt);
+
+ SVD_FUNC_DEF( gesdd )
+ SVD_FUNC(gesdd, float , float , s)
+ SVD_FUNC(gesdd, double , double, d)
+ SVD_FUNC(gesdd, cfloat , float , c)
+ SVD_FUNC(gesdd, cdouble, double, z)
+
+#else // Atlas causes memory freeing issues with using gesdd
+
+ template<typename T, typename Tr>
+ using svd_func_def = int (*)(ORDER_TYPE,
+ char jobu, char jobvt,
+ int m, int n,
+ T* in, int ldin,
+ Tr* s,
+ T* u, int ldu,
+ T* vt, int ldvt,
+ Tr *superb);
+
+ SVD_FUNC_DEF( gesvd )
+ SVD_FUNC(gesvd, float , float , s)
+ SVD_FUNC(gesvd, double , double, d)
+ SVD_FUNC(gesvd, cfloat , float , c)
+ SVD_FUNC(gesvd, cdouble, double, z)
+
+#endif
+
+ template <typename T, typename Tr>
+ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
+ {
+ dim4 iDims = in.dims();
+ int M = iDims[0];
+ int N = iDims[1];
+
+ // S, U, Vt are empty. Simply write to them
+ Tr *sPtr = pinnedAlloc<Tr>(s.elements());
+ T *uPtr = pinnedAlloc<T >(u.elements());
+ T *vPtr = pinnedAlloc<T >(vt.elements());
+ T *iPtr = pinnedAlloc<T >(in.elements());
+
+ copyData(sPtr, s);
+ copyData(uPtr, u);
+ copyData(vPtr, vt);
+ copyData(iPtr, in);
+
+#if defined(USE_MKL) || defined(__APPLE__)
+ svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', M, N, iPtr, in.strides()[1],
+ sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1]);
+#else
+ std::vector<Tr> superb(std::min(M, N));
+ svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', 'A', M, N, iPtr, in.strides()[1],
+ sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1], &superb[0]);
+#endif
+ writeHostDataArray(s , sPtr, s.elements() * sizeof(Tr));
+ writeHostDataArray(u , uPtr, u.elements() * sizeof(T ));
+ writeHostDataArray(vt, vPtr, vt.elements() * sizeof(T ));
+ writeHostDataArray(in, iPtr, in.elements() * sizeof(T ));
+
+ pinnedFree(sPtr);
+ pinnedFree(uPtr);
+ pinnedFree(vPtr);
+ pinnedFree(iPtr);
+ }
+
+ template <typename T, typename Tr>
+ void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
+ {
+ dim4 iDims = in.dims();
+ int M = iDims[0];
+ int N = iDims[1];
+
+ // S, U, Vt are empty. Simply write to them
+ Tr *sPtr = pinnedAlloc<Tr>(s.elements());
+ T *uPtr = pinnedAlloc<T >(u.elements());
+ T *vPtr = pinnedAlloc<T >(vt.elements());
+ T *iPtr = pinnedAlloc<T >(in.elements());
+
+ copyData(sPtr, s);
+ copyData(uPtr, u);
+ copyData(vPtr, vt);
+ copyData(iPtr, in);
+
+#if defined(USE_MKL) || defined(__APPLE__)
+ svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', M, N, iPtr, in.strides()[1],
+ sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1]);
+#else
+ std::vector<Tr> superb(std::min(M, N));
+ svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', 'A', M, N, iPtr, in.strides()[1],
+ sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1], &superb[0]);
+#endif
+ writeHostDataArray(s , sPtr, s.elements() * sizeof(Tr));
+ writeHostDataArray(u , uPtr, u.elements() * sizeof(T ));
+ writeHostDataArray(vt, vPtr, vt.elements() * sizeof(T ));
+
+ pinnedFree(sPtr);
+ pinnedFree(uPtr);
+ pinnedFree(vPtr);
+ pinnedFree(iPtr);
+ }
+
+#define INSTANTIATE_SVD(T, Tr) \
+ template void svd<T, Tr>(Array<Tr> & s, Array<T> & u, Array<T> & vt, const Array<T> &in); \
+ template void svdInPlace<T, Tr>(Array<Tr> & s, Array<T> & u, Array<T> & vt, Array<T> &in);
+
+ INSTANTIATE_SVD(float , float )
+ INSTANTIATE_SVD(double , double)
+ INSTANTIATE_SVD(cfloat , float )
+ INSTANTIATE_SVD(cdouble, double)
+}
+}
+
+#endif
diff --git a/src/backend/cuda/cpu_lapack/cpu_svd.hpp b/src/backend/cuda/cpu_lapack/cpu_svd.hpp
new file mode 100644
index 0000000..f5fc1a8
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_svd.hpp
@@ -0,0 +1,22 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace cuda
+{
+namespace cpu
+{
+ template<typename T, typename Tr>
+ void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in);
+
+ template<typename T, typename Tr>
+ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in);
+}
+}
diff --git a/src/backend/cuda/svd.cu b/src/backend/cuda/svd.cu
index 37ffa78..e07c1f0 100644
--- a/src/backend/cuda/svd.cu
+++ b/src/backend/cuda/svd.cu
@@ -17,13 +17,12 @@
#include <math.hpp>
#include <err_common.hpp>
-namespace cuda
-{
-
#if defined(WITH_CUDA_LINEAR_ALGEBRA)
#include <cusolverDnManager.hpp>
+namespace cuda
+{
using cusolver::getDnHandle;
template<typename T>
@@ -124,9 +123,33 @@ SVD_SPECIALIZE(cdouble, double, Z);
transpose_inplace(u, true);
}
}
+}
+#elif defined(WITH_CPU_LINEAR_ALGEBRA)
+
+#include <cpu_lapack/cpu_svd.hpp>
+
+namespace cuda
+{
+
+template<typename T, typename Tr>
+void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
+{
+ return cpu::svd<T, Tr>(s, u, vt, in);
+}
+
+template<typename T, typename Tr>
+void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
+{
+ return cpu::svdInPlace<T, Tr>(s, u, vt, in);
+}
+
+}
#else
+namespace cuda
+{
+
template<typename T, typename Tr>
void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
{
@@ -141,8 +164,13 @@ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
AF_ERR_NOT_CONFIGURED);
}
+}
+
#endif
+namespace cuda
+{
+
#define INSTANTIATE(T, Tr) \
template void svd<T, Tr>(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in); \
template void svdInPlace<T, Tr>(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list