[arrayfire] 74/84: Changes to opencl backend when building with openblas

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Jan 4 23:22:28 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository arrayfire.

commit 613557c090af8d4a0f84b06ad4b63ff77d2843a5
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date:   Tue Dec 29 20:37:32 2015 -0500

    Changes to opencl backend when building with openblas
---
 src/backend/opencl/magma/labrd.cpp        | 119 ++++++++++++++++--------------
 src/backend/opencl/magma/magma_cpu_blas.h |  31 +++++---
 2 files changed, 85 insertions(+), 65 deletions(-)

diff --git a/src/backend/opencl/magma/labrd.cpp b/src/backend/opencl/magma/labrd.cpp
index 0284ee8..115b48d 100644
--- a/src/backend/opencl/magma/labrd.cpp
+++ b/src/backend/opencl/magma/labrd.cpp
@@ -64,6 +64,12 @@
 
 #include <algorithm>
 
+#define cpu_blas_gemv_macro(_trans, _m, _n, _alpha, _aptr, _lda, _xptr, _incx, _beta, _yptr, _incy) \
+    cpu_blas_gemv(_trans, _m, _n,                                       \
+                  cblas_scalar(_alpha), cblas_ptr(_aptr), _lda,         \
+                  cblas_ptr(_xptr), _incx,                              \
+                  cblas_scalar(_beta), cblas_ptr(_yptr), _incy)
+
 template<typename Ty>  magma_int_t
 magma_labrd_gpu(
     magma_int_t m, magma_int_t n, magma_int_t nb,
@@ -264,15 +270,15 @@ magma_labrd_gpu(
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
             }
 
-            cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ + a_dim1], lda,
-                          &y[i__+y_dim1], ldy, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], c__1);
+            cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &a[i__ + a_dim1], lda,
+                          &y[i__+y_dim1], ldy, (&c_one), &a[i__ + i__ * a_dim1], c__1);
 
             if (is_cplx) {
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
             }
 
-            cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + x_dim1], ldx,
-                          &a[i__*a_dim1+1], c__1, cblas_scalar(&c_one), &a[i__+i__*a_dim1], c__1);
+            cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &x[i__ + x_dim1], ldx,
+                          &a[i__*a_dim1+1], c__1, (&c_one), &a[i__+i__*a_dim1], c__1);
 
             /* Generate reflection Q(i) to annihilate A(i+1:m,i) */
             alpha = a[i__ + i__ * a_dim1];
@@ -310,19 +316,19 @@ magma_labrd_gpu(
                                           queue, &event);
                 i__2 = m - i__ + 1;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &a[i__ + a_dim1],
-                              lda, &a[i__ + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &a[i__ + a_dim1],
+                              lda, &a[i__ + i__ * a_dim1], c__1, (&c_zero),
                               &y[i__ * y_dim1 + 1], c__1);
 
                 i__2 = n - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + 1 +y_dim1], ldy,
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &y[i__ + 1 +y_dim1], ldy,
                               &y[i__ * y_dim1 + 1], c__1,
-                              cblas_scalar(&c_zero), f, c__1);
+                              (&c_zero), f, c__1);
                 i__2 = m - i__ + 1;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &x[i__ + x_dim1],
-                              ldx, &a[i__ + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &x[i__ + x_dim1],
+                              ldx, &a[i__ + i__ * a_dim1], c__1, (&c_zero),
                               &y[i__ * y_dim1 + 1], c__1);
 
                 // 4. Synch to make sure the result is back ----------------
@@ -330,16 +336,17 @@ magma_labrd_gpu(
 
                 if (i__3 != 0){
                     i__2 = n - i__;
-                    cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &y[i__+1+i__*y_dim1],c__1);
+                    cpu_blas_axpy(i__2, cblas_scalar(&c_one),
+                                  cblas_ptr(f),c__1, cblas_ptr(&y[i__+1+i__*y_dim1]), c__1);
                 }
 
                 i__2 = i__ - 1;
                 i__3 = n - i__;
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one),
-                              &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], c__1, cblas_scalar(&c_one),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one),
+                              &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], c__1, (&c_one),
                               &y[i__ + 1 + i__ * y_dim1], c__1);
                 i__2 = n - i__;
-                cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), &y[i__ + 1 + i__ * y_dim1], c__1);
+                cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), cblas_ptr(&y[i__ + 1 + i__ * y_dim1]), c__1);
 
                 /* Update A(i,i+1:n) */
                 i__2 = n - i__;
@@ -348,9 +355,9 @@ magma_labrd_gpu(
                     LAPACKE_CHECK(cpu_lapack_lacgv(i__,  &a[i__+a_dim1], lda));
                 }
 
-                cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one),
                               &y[i__ + 1 + y_dim1], ldy, &a[i__ + a_dim1], lda,
-                              cblas_scalar(&c_one), &a[i__ + (i__ + 1) * a_dim1], lda);
+                              (&c_one), &a[i__ + (i__ + 1) * a_dim1], lda);
                 i__2 = i__ - 1;
                 i__3 = n - i__;
 
@@ -359,8 +366,8 @@ magma_labrd_gpu(
                     LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
                 }
 
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[(i__ + 1) *
-                                                                              a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one), &a[(i__ + 1) *
+                                                                              a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, (&c_one), &a[
                                                                                   i__ + (i__ + 1) * a_dim1], lda);
                 if (is_cplx) {
                     LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
@@ -402,35 +409,35 @@ magma_labrd_gpu(
                                           queue, &event);
 
                 i__2 = n - i__;
-                cpu_blas_gemv(CblasTransParam, i__2, i__, cblas_scalar(&c_one), &y[i__ + 1 + y_dim1],
-                              ldy, &a[i__ + (i__ + 1) * a_dim1], lda, cblas_scalar(&c_zero), &x[
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__, (&c_one), &y[i__ + 1 + y_dim1],
+                              ldy, &a[i__ + (i__ + 1) * a_dim1], lda, (&c_zero), &x[
                                   i__ * x_dim1 + 1], c__1);
 
                 i__2 = m - i__;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one), &a[i__ + 1 + a_dim1], lda,
-                              &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_zero), f, c__1);
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one), &a[i__ + 1 + a_dim1], lda,
+                              &x[i__ * x_dim1 + 1], c__1, (&c_zero), f, c__1);
                 i__2 = i__ - 1;
                 i__3 = n - i__;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_one), &a[(i__ + 1) * a_dim1 + 1],
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_one), &a[(i__ + 1) * a_dim1 + 1],
                               lda, &a[i__ + (i__ + 1) * a_dim1], lda,
-                              cblas_scalar(&c_zero), &x[i__ * x_dim1 + 1], c__1);
+                              (&c_zero), &x[i__ * x_dim1 + 1], c__1);
 
                 // 4. Synch to make sure the result is back ----------------
                 magma_event_sync(event);
 
                 if (i__!=0){
                     i__2 = m - i__;
-                    cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &x[i__+1+i__*x_dim1],c__1);
+                    cpu_blas_axpy(i__2, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&x[i__+1+i__*x_dim1]),c__1);
                 }
 
 
                 i__2 = m - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + 1 +
-                                                                           x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_one), &x[
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &x[i__ + 1 +
+                                                                           x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, (&c_one), &x[
                                                                                i__ + 1 + i__ * x_dim1], c__1);
                 i__2 = m - i__;
-                cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), &x[i__ + 1 + i__ * x_dim1], c__1);
+                cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), cblas_ptr(&x[i__ + 1 + i__ * x_dim1]), c__1);
 
                 if (is_cplx) {
                     i__2 = n - i__;
@@ -455,16 +462,16 @@ magma_labrd_gpu(
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
             }
-            cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + y_dim1], ldy,
-                          &a[i__ + a_dim1], lda, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
+            cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &y[i__ + y_dim1], ldy,
+                          &a[i__ + a_dim1], lda, (&c_one), &a[i__ + i__ * a_dim1], lda);
             i__2 = i__ - 1;
             if (is_cplx) {
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx));
             }
             i__3 = n - i__ + 1;
-            cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ * a_dim1 + 1],
-                          lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
+            cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one), &a[i__ * a_dim1 + 1],
+                          lda, &x[i__ + x_dim1], ldx, (&c_one), &a[i__ + i__ * a_dim1], lda);
             if (is_cplx) {
                 LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx));
             }
@@ -510,35 +517,35 @@ magma_labrd_gpu(
 
                 i__2 = n - i__ + 1;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &y[i__ + y_dim1],
-                              ldy, &a[i__ + i__ * a_dim1], lda, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &y[i__ + y_dim1],
+                              ldy, &a[i__ + i__ * a_dim1], lda, (&c_zero),
                               &x[i__ *  x_dim1 + 1], c__1);
                 i__2 = m - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
-                              &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+                              &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], c__1, (&c_zero),
                               f, c__1);
 
                 i__2 = i__ - 1;
                 i__3 = n - i__ + 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_one),
-                              &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_one),
+                              &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, (&c_zero),
                               &x[i__ * x_dim1 + 1], c__1);
 
                 // 4. Synch to make sure the result is back ----------------
                 magma_event_sync(event);
                 if (i__2 != 0){
                     i__3 = m - i__;
-                    cpu_blas_axpy(i__3, cblas_scalar(&c_one), f,c__1, &x[i__+1+i__*x_dim1],c__1);
+                    cpu_blas_axpy(i__3, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&x[i__+1+i__*x_dim1]),c__1);
                 }
 
                 i__2 = m - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
-                              &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_one),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+                              &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, (&c_one),
                               &x[i__ + 1 + i__ * x_dim1], c__1);
                 i__2 = m - i__;
-                cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), &x[i__ + 1 + i__ * x_dim1], c__1);
+                cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), cblas_ptr(&x[i__ + 1 + i__ * x_dim1]), c__1);
                 i__2 = n - i__ + 1;
 
                 if (is_cplx) {
@@ -557,15 +564,15 @@ magma_labrd_gpu(
                     LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
                 }
 
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
-                              &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, cblas_scalar(&c_one),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+                              &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, (&c_one),
                               &a[i__ + 1 + i__ * a_dim1], c__1);
                 i__2 = m - i__;
                 if (is_cplx) {
                     LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
                 }
-                cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
-                              &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, cblas_scalar(&c_one),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one),
+                              &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, (&c_one),
                               &a[i__ + 1 + i__ * a_dim1], c__1);
 
                 /* Generate reflection Q(i) to annihilate A(i+2:m,i) */
@@ -602,33 +609,33 @@ magma_labrd_gpu(
 
                 i__2 = m - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &a[i__ + 1 + a_dim1],
-                              lda, &a[i__ + 1 + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &a[i__ + 1 + a_dim1],
+                              lda, &a[i__ + 1 + i__ * a_dim1], c__1, (&c_zero),
                               &y[ i__ * y_dim1 + 1], c__1);
                 i__2 = n - i__;
                 i__3 = i__ - 1;
-                cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
+                cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
                               &y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], c__1,
-                              cblas_scalar(&c_zero), f, c__1);
+                              (&c_zero), f, c__1);
 
                 i__2 = m - i__;
-                cpu_blas_gemv(CblasTransParam, i__2, i__, cblas_scalar(&c_one), &x[i__ + 1 + x_dim1],
-                              ldx, &a[i__ + 1 + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+                cpu_blas_gemv_macro(CblasTransParam, i__2, i__, (&c_one), &x[i__ + 1 + x_dim1],
+                              ldx, &a[i__ + 1 + i__ * a_dim1], c__1, (&c_zero),
                               &y[i__ * y_dim1 + 1], c__1);
 
                 // 4. Synch to make sure the result is back ----------------
                 magma_event_sync(event);
                 if (i__3 != 0){
                     i__2 = n - i__;
-                    cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &y[i__+1+i__*y_dim1],c__1);
+                    cpu_blas_axpy(i__2, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&y[i__+1+i__*y_dim1]),c__1);
                 }
 
                 i__2 = n - i__;
-                cpu_blas_gemv(CblasTransParam, i__, i__2, cblas_scalar(&c_neg_one),
+                cpu_blas_gemv_macro(CblasTransParam, i__, i__2, (&c_neg_one),
                               &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1],
-                              c__1, cblas_scalar(&c_one), &y[i__ + 1 + i__ * y_dim1], c__1);
+                              c__1, (&c_one), &y[i__ + 1 + i__ * y_dim1], c__1);
                 i__2 = n - i__;
-                cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), &y[i__ + 1 + i__ * y_dim1], c__1);
+                cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), cblas_ptr(&y[i__ + 1 + i__ * y_dim1]), c__1);
             }
             else {
                 if (is_cplx) {
diff --git a/src/backend/opencl/magma/magma_cpu_blas.h b/src/backend/opencl/magma/magma_cpu_blas.h
index 6ae4f8f..b3cba09 100644
--- a/src/backend/opencl/magma/magma_cpu_blas.h
+++ b/src/backend/opencl/magma/magma_cpu_blas.h
@@ -41,14 +41,14 @@ typedef int blasint;
     template<typename T>                        \
     struct cpu_blas_##NAME##_func;
 
-#define CPU_BLAS_FUNC1(NAME, TYPE, X)                       \
-    template<>                                              \
-    struct cpu_blas_##NAME##_func<TYPE>                     \
-    {                                                       \
-        template<typename... Args>                          \
-            void                                            \
-            operator() (Args... args)                       \
-        { return cblas_##X##NAME(CblasColMajor, args...); } \
+#define CPU_BLAS_FUNC1(NAME, TYPE, X)                   \
+    template<>                                          \
+    struct cpu_blas_##NAME##_func<TYPE>                 \
+    {                                                   \
+        template<typename... Args>                      \
+            void                                        \
+            operator() (Args... args)                   \
+        { cblas_##X##NAME(CblasColMajor, args...); }    \
     };
 
 #define CPU_BLAS_FUNC2(NAME, TYPE, X)           \
@@ -58,7 +58,7 @@ typedef int blasint;
         template<typename... Args>              \
             void                                \
             operator() (Args... args)           \
-        { return cblas_##X##NAME(args...); }    \
+        {  cblas_##X##NAME(args...); }          \
     };
 
 #define CPU_BLAS_DECL1(NAME)                        \
@@ -81,11 +81,24 @@ CPU_BLAS_DECL2(axpy)
 
 inline float * cblas_ptr(float *in) { return in; }
 inline double * cblas_ptr(double *in) { return in; }
+
+#if defined(IS_OPENBLAS)
+inline float * cblas_ptr(magmaFloatComplex *in) { return (float *)in; }
+inline double * cblas_ptr(magmaDoubleComplex *in) { return (double *)in; }
+#else
 inline void * cblas_ptr(magmaFloatComplex *in) { return (void *)in; }
 inline void * cblas_ptr(magmaDoubleComplex *in) { return (void *)in; }
+#endif
 
 inline float cblas_scalar(float *in) { return *in; }
 inline double cblas_scalar(double *in) { return *in; }
+
+#if defined(IS_OPENBLAS)
+inline float *cblas_scalar(magmaFloatComplex *in) { return (float *)in; }
+inline double *cblas_scalar(magmaDoubleComplex *in) { return (double *)in; }
+#else
 inline void *cblas_scalar(magmaFloatComplex *in) { return (void *)in; }
 inline void *cblas_scalar(magmaDoubleComplex *in) { return (void *)in; }
 #endif
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list