[arrayfire] 115/248: Added CPU fallback for CUDA SVD when CUDA older than 7

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Nov 17 15:54:11 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 55fdae4301bf01400bc36cf213f5a248fbf30765
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Thu Oct 8 19:48:19 2015 -0400

    Added CPU fallback for CUDA SVD when CUDA older than 7
---
 src/backend/cuda/cpu_lapack/cpu_svd.cpp | 153 ++++++++++++++++++++++++++++++++
 src/backend/cuda/cpu_lapack/cpu_svd.hpp |  22 +++++
 src/backend/cuda/svd.cu                 |  34 ++++++-
 3 files changed, 206 insertions(+), 3 deletions(-)

diff --git a/src/backend/cuda/cpu_lapack/cpu_svd.cpp b/src/backend/cuda/cpu_lapack/cpu_svd.cpp
new file mode 100644
index 0000000..eb71606
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_svd.cpp
@@ -0,0 +1,153 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#if defined(WITH_CPU_LINEAR_ALGEBRA)
+#include <cpu_lapack/cpu_svd.hpp>
+
+#include <Array.hpp>
+#include <svd.hpp>
+#include <err_common.hpp>
+#include <copy.hpp>
+
+#include "lapack_helper.hpp"
+
+namespace cuda
+{
+namespace cpu
+{
+
+#define SVD_FUNC_DEF( FUNC )                                            \
+    template<typename T,typename Tr> svd_func_def<T, Tr> svd_func();
+
+#define SVD_FUNC( FUNC, T, Tr, PREFIX )                     \
+    template<> svd_func_def<T, Tr>     svd_func<T, Tr>()    \
+    { return & LAPACK_NAME(PREFIX##FUNC); }
+
+#if defined(USE_MKL) || defined(__APPLE__)
+
+    template<typename T, typename Tr>
+    using svd_func_def = int (*)(ORDER_TYPE,
+                                 char jobz,
+                                 int m, int n,
+                                 T* in, int ldin,
+                                 Tr* s,
+                                 T* u, int ldu,
+                                 T* vt, int ldvt);
+
+    SVD_FUNC_DEF( gesdd )
+    SVD_FUNC(gesdd, float  , float , s)
+    SVD_FUNC(gesdd, double , double, d)
+    SVD_FUNC(gesdd, cfloat , float , c)
+    SVD_FUNC(gesdd, cdouble, double, z)
+
+#else   // Atlas causes memory freeing issues with using gesdd
+
+    template<typename T, typename Tr>
+    using svd_func_def = int (*)(ORDER_TYPE,
+                                 char jobu, char jobvt,
+                                 int m, int n,
+                                 T* in, int ldin,
+                                 Tr* s,
+                                 T* u, int ldu,
+                                 T* vt, int ldvt,
+                                 Tr *superb);
+
+    SVD_FUNC_DEF( gesvd )
+    SVD_FUNC(gesvd, float  , float , s)
+    SVD_FUNC(gesvd, double , double, d)
+    SVD_FUNC(gesvd, cfloat , float , c)
+    SVD_FUNC(gesvd, cdouble, double, z)
+
+#endif
+
+    template <typename T, typename Tr>
+    void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
+    {
+        dim4 iDims = in.dims();
+        int M = iDims[0];
+        int N = iDims[1];
+
+        // S, U, Vt are empty. Simply write to them
+        Tr *sPtr = pinnedAlloc<Tr>(s.elements());
+        T  *uPtr = pinnedAlloc<T >(u.elements());
+        T  *vPtr = pinnedAlloc<T >(vt.elements());
+        T  *iPtr = pinnedAlloc<T >(in.elements());
+
+        copyData(sPtr, s);
+        copyData(uPtr, u);
+        copyData(vPtr, vt);
+        copyData(iPtr, in);
+
+#if defined(USE_MKL) || defined(__APPLE__)
+        svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', M, N, iPtr, in.strides()[1],
+                          sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1]);
+#else
+        std::vector<Tr> superb(std::min(M, N));
+        svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', 'A', M, N, iPtr, in.strides()[1],
+                          sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1], &superb[0]);
+#endif
+        writeHostDataArray(s , sPtr, s.elements()  * sizeof(Tr));
+        writeHostDataArray(u , uPtr, u.elements()  * sizeof(T ));
+        writeHostDataArray(vt, vPtr, vt.elements() * sizeof(T ));
+        writeHostDataArray(in, iPtr, in.elements() * sizeof(T ));
+
+        pinnedFree(sPtr);
+        pinnedFree(uPtr);
+        pinnedFree(vPtr);
+        pinnedFree(iPtr);
+    }
+
+    template <typename T, typename Tr>
+    void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
+    {
+        dim4 iDims = in.dims();
+        int M = iDims[0];
+        int N = iDims[1];
+
+        // S, U, Vt are empty. Simply write to them
+        Tr *sPtr = pinnedAlloc<Tr>(s.elements());
+        T  *uPtr = pinnedAlloc<T >(u.elements());
+        T  *vPtr = pinnedAlloc<T >(vt.elements());
+        T  *iPtr = pinnedAlloc<T >(in.elements());
+
+        copyData(sPtr, s);
+        copyData(uPtr, u);
+        copyData(vPtr, vt);
+        copyData(iPtr, in);
+
+#if defined(USE_MKL) || defined(__APPLE__)
+        svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', M, N, iPtr, in.strides()[1],
+                          sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1]);
+#else
+        std::vector<Tr> superb(std::min(M, N));
+        svd_func<T, Tr>()(AF_LAPACK_COL_MAJOR, 'A', 'A', M, N, iPtr, in.strides()[1],
+                          sPtr, uPtr, u.strides()[1], vPtr, vt.strides()[1], &superb[0]);
+#endif
+        writeHostDataArray(s , sPtr, s.elements()  * sizeof(Tr));
+        writeHostDataArray(u , uPtr, u.elements()  * sizeof(T ));
+        writeHostDataArray(vt, vPtr, vt.elements() * sizeof(T ));
+
+        pinnedFree(sPtr);
+        pinnedFree(uPtr);
+        pinnedFree(vPtr);
+        pinnedFree(iPtr);
+    }
+
+#define INSTANTIATE_SVD(T, Tr)                                          \
+    template void svd<T, Tr>(Array<Tr> & s, Array<T> & u, Array<T> & vt, const Array<T> &in); \
+    template void svdInPlace<T, Tr>(Array<Tr> & s, Array<T> & u, Array<T> & vt, Array<T> &in);
+
+    INSTANTIATE_SVD(float  , float )
+    INSTANTIATE_SVD(double , double)
+    INSTANTIATE_SVD(cfloat , float )
+    INSTANTIATE_SVD(cdouble, double)
+}
+}
+
+#endif
diff --git a/src/backend/cuda/cpu_lapack/cpu_svd.hpp b/src/backend/cuda/cpu_lapack/cpu_svd.hpp
new file mode 100644
index 0000000..f5fc1a8
--- /dev/null
+++ b/src/backend/cuda/cpu_lapack/cpu_svd.hpp
@@ -0,0 +1,22 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <Array.hpp>
+
+namespace cuda
+{
+namespace cpu
+{
+    template<typename T, typename Tr>
+    void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in);
+
+    template<typename T, typename Tr>
+    void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in);
+}
+}
diff --git a/src/backend/cuda/svd.cu b/src/backend/cuda/svd.cu
index 37ffa78..e07c1f0 100644
--- a/src/backend/cuda/svd.cu
+++ b/src/backend/cuda/svd.cu
@@ -17,13 +17,12 @@
 #include <math.hpp>
 #include <err_common.hpp>
 
-namespace cuda
-{
-
 #if defined(WITH_CUDA_LINEAR_ALGEBRA)
 
 #include <cusolverDnManager.hpp>
 
+namespace cuda
+{
     using cusolver::getDnHandle;
 
     template<typename T>
@@ -124,9 +123,33 @@ SVD_SPECIALIZE(cdouble, double, Z);
             transpose_inplace(u, true);
         }
     }
+}
+#elif defined(WITH_CPU_LINEAR_ALGEBRA)
+
+#include <cpu_lapack/cpu_svd.hpp>
+
+namespace cuda
+{
+
+template<typename T, typename Tr>
+void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
+{
+    return cpu::svd<T, Tr>(s, u, vt, in);
+}
+
+template<typename T, typename Tr>
+void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
+{
+    return cpu::svdInPlace<T, Tr>(s, u, vt, in);
+}
+
+}
 
 #else
 
+namespace cuda
+{
+
 template<typename T, typename Tr>
 void svd(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in)
 {
@@ -141,8 +164,13 @@ void svdInPlace(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in)
              AF_ERR_NOT_CONFIGURED);
 }
 
+}
+
 #endif
 
+namespace cuda
+{
+
 #define INSTANTIATE(T, Tr)                                              \
     template void svd<T, Tr>(Array<Tr> &s, Array<T> &u, Array<T> &vt, const Array<T> &in); \
     template void svdInPlace<T, Tr>(Array<Tr> &s, Array<T> &u, Array<T> &vt, Array<T> &in);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list