[arrayfire] 74/84: Changes to opencl backend when building with openblas
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Mon Jan 4 23:22:28 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository arrayfire.
commit 613557c090af8d4a0f84b06ad4b63ff77d2843a5
Author: Pavan Yalamanchili <pavan at arrayfire.com>
Date: Tue Dec 29 20:37:32 2015 -0500
Changes to opencl backend when building with openblas
---
src/backend/opencl/magma/labrd.cpp | 119 ++++++++++++++++--------------
src/backend/opencl/magma/magma_cpu_blas.h | 31 +++++---
2 files changed, 85 insertions(+), 65 deletions(-)
diff --git a/src/backend/opencl/magma/labrd.cpp b/src/backend/opencl/magma/labrd.cpp
index 0284ee8..115b48d 100644
--- a/src/backend/opencl/magma/labrd.cpp
+++ b/src/backend/opencl/magma/labrd.cpp
@@ -64,6 +64,12 @@
#include <algorithm>
+#define cpu_blas_gemv_macro(_trans, _m, _n, _alpha, _aptr, _lda, _xptr, _incx, _beta, _yptr, _incy) \
+ cpu_blas_gemv(_trans, _m, _n, \
+ cblas_scalar(_alpha), cblas_ptr(_aptr), _lda, \
+ cblas_ptr(_xptr), _incx, \
+ cblas_scalar(_beta), cblas_ptr(_yptr), _incy)
+
template<typename Ty> magma_int_t
magma_labrd_gpu(
magma_int_t m, magma_int_t n, magma_int_t nb,
@@ -264,15 +270,15 @@ magma_labrd_gpu(
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ + a_dim1], lda,
- &y[i__+y_dim1], ldy, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], c__1);
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &a[i__ + a_dim1], lda,
+ &y[i__+y_dim1], ldy, (&c_one), &a[i__ + i__ * a_dim1], c__1);
if (is_cplx) {
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__+y_dim1], ldy));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + x_dim1], ldx,
- &a[i__*a_dim1+1], c__1, cblas_scalar(&c_one), &a[i__+i__*a_dim1], c__1);
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &x[i__ + x_dim1], ldx,
+ &a[i__*a_dim1+1], c__1, (&c_one), &a[i__+i__*a_dim1], c__1);
/* Generate reflection Q(i) to annihilate A(i+1:m,i) */
alpha = a[i__ + i__ * a_dim1];
@@ -310,19 +316,19 @@ magma_labrd_gpu(
queue, &event);
i__2 = m - i__ + 1;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &a[i__ + a_dim1],
- lda, &a[i__ + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &a[i__ + a_dim1],
+ lda, &a[i__ + i__ * a_dim1], c__1, (&c_zero),
&y[i__ * y_dim1 + 1], c__1);
i__2 = n - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + 1 +y_dim1], ldy,
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &y[i__ + 1 +y_dim1], ldy,
&y[i__ * y_dim1 + 1], c__1,
- cblas_scalar(&c_zero), f, c__1);
+ (&c_zero), f, c__1);
i__2 = m - i__ + 1;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &x[i__ + x_dim1],
- ldx, &a[i__ + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &x[i__ + x_dim1],
+ ldx, &a[i__ + i__ * a_dim1], c__1, (&c_zero),
&y[i__ * y_dim1 + 1], c__1);
// 4. Synch to make sure the result is back ----------------
@@ -330,16 +336,17 @@ magma_labrd_gpu(
if (i__3 != 0){
i__2 = n - i__;
- cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &y[i__+1+i__*y_dim1],c__1);
+ cpu_blas_axpy(i__2, cblas_scalar(&c_one),
+ cblas_ptr(f),c__1, cblas_ptr(&y[i__+1+i__*y_dim1]), c__1);
}
i__2 = i__ - 1;
i__3 = n - i__;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one),
- &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], c__1, cblas_scalar(&c_one),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one),
+ &a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], c__1, (&c_one),
&y[i__ + 1 + i__ * y_dim1], c__1);
i__2 = n - i__;
- cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), &y[i__ + 1 + i__ * y_dim1], c__1);
+ cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), cblas_ptr(&y[i__ + 1 + i__ * y_dim1]), c__1);
/* Update A(i,i+1:n) */
i__2 = n - i__;
@@ -348,9 +355,9 @@ magma_labrd_gpu(
LAPACKE_CHECK(cpu_lapack_lacgv(i__, &a[i__+a_dim1], lda));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one),
&y[i__ + 1 + y_dim1], ldy, &a[i__ + a_dim1], lda,
- cblas_scalar(&c_one), &a[i__ + (i__ + 1) * a_dim1], lda);
+ (&c_one), &a[i__ + (i__ + 1) * a_dim1], lda);
i__2 = i__ - 1;
i__3 = n - i__;
@@ -359,8 +366,8 @@ magma_labrd_gpu(
LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
}
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[(i__ + 1) *
- a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one), &a[(i__ + 1) *
+ a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, (&c_one), &a[
i__ + (i__ + 1) * a_dim1], lda);
if (is_cplx) {
LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__+x_dim1], ldx));
@@ -402,35 +409,35 @@ magma_labrd_gpu(
queue, &event);
i__2 = n - i__;
- cpu_blas_gemv(CblasTransParam, i__2, i__, cblas_scalar(&c_one), &y[i__ + 1 + y_dim1],
- ldy, &a[i__ + (i__ + 1) * a_dim1], lda, cblas_scalar(&c_zero), &x[
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__, (&c_one), &y[i__ + 1 + y_dim1],
+ ldy, &a[i__ + (i__ + 1) * a_dim1], lda, (&c_zero), &x[
i__ * x_dim1 + 1], c__1);
i__2 = m - i__;
- cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one), &a[i__ + 1 + a_dim1], lda,
- &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_zero), f, c__1);
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one), &a[i__ + 1 + a_dim1], lda,
+ &x[i__ * x_dim1 + 1], c__1, (&c_zero), f, c__1);
i__2 = i__ - 1;
i__3 = n - i__;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_one), &a[(i__ + 1) * a_dim1 + 1],
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_one), &a[(i__ + 1) * a_dim1 + 1],
lda, &a[i__ + (i__ + 1) * a_dim1], lda,
- cblas_scalar(&c_zero), &x[i__ * x_dim1 + 1], c__1);
+ (&c_zero), &x[i__ * x_dim1 + 1], c__1);
// 4. Synch to make sure the result is back ----------------
magma_event_sync(event);
if (i__!=0){
i__2 = m - i__;
- cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &x[i__+1+i__*x_dim1],c__1);
+ cpu_blas_axpy(i__2, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&x[i__+1+i__*x_dim1]),c__1);
}
i__2 = m - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &x[i__ + 1 +
- x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_one), &x[
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &x[i__ + 1 +
+ x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, (&c_one), &x[
i__ + 1 + i__ * x_dim1], c__1);
i__2 = m - i__;
- cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), &x[i__ + 1 + i__ * x_dim1], c__1);
+ cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), cblas_ptr(&x[i__ + 1 + i__ * x_dim1]), c__1);
if (is_cplx) {
i__2 = n - i__;
@@ -455,16 +462,16 @@ magma_labrd_gpu(
LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &a[i__ + i__ * a_dim1], lda));
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one), &y[i__ + y_dim1], ldy,
- &a[i__ + a_dim1], lda, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one), &y[i__ + y_dim1], ldy,
+ &a[i__ + a_dim1], lda, (&c_one), &a[i__ + i__ * a_dim1], lda);
i__2 = i__ - 1;
if (is_cplx) {
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &a[i__ + a_dim1], lda));
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &x[i__ + x_dim1], ldx));
}
i__3 = n - i__ + 1;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_neg_one), &a[i__ * a_dim1 + 1],
- lda, &x[i__ + x_dim1], ldx, cblas_scalar(&c_one), &a[i__ + i__ * a_dim1], lda);
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_neg_one), &a[i__ * a_dim1 + 1],
+ lda, &x[i__ + x_dim1], ldx, (&c_one), &a[i__ + i__ * a_dim1], lda);
if (is_cplx) {
LAPACKE_CHECK(cpu_lapack_lacgv(i__2, &x[i__ + x_dim1], ldx));
}
@@ -510,35 +517,35 @@ magma_labrd_gpu(
i__2 = n - i__ + 1;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &y[i__ + y_dim1],
- ldy, &a[i__ + i__ * a_dim1], lda, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &y[i__ + y_dim1],
+ ldy, &a[i__ + i__ * a_dim1], lda, (&c_zero),
&x[i__ * x_dim1 + 1], c__1);
i__2 = m - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
- &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+ &a[i__ + 1 + a_dim1], lda, &x[i__ * x_dim1 + 1], c__1, (&c_zero),
f, c__1);
i__2 = i__ - 1;
i__3 = n - i__ + 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_one),
- &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_one),
+ &a[i__ * a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, (&c_zero),
&x[i__ * x_dim1 + 1], c__1);
// 4. Synch to make sure the result is back ----------------
magma_event_sync(event);
if (i__2 != 0){
i__3 = m - i__;
- cpu_blas_axpy(i__3, cblas_scalar(&c_one), f,c__1, &x[i__+1+i__*x_dim1],c__1);
+ cpu_blas_axpy(i__3, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&x[i__+1+i__*x_dim1]),c__1);
}
i__2 = m - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
- &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, cblas_scalar(&c_one),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+ &x[i__ + 1 + x_dim1], ldx, &x[i__ * x_dim1 + 1], c__1, (&c_one),
&x[i__ + 1 + i__ * x_dim1], c__1);
i__2 = m - i__;
- cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), &x[i__ + 1 + i__ * x_dim1], c__1);
+ cpu_blas_scal(i__2, cblas_scalar(&taup[i__]), cblas_ptr(&x[i__ + 1 + i__ * x_dim1]), c__1);
i__2 = n - i__ + 1;
if (is_cplx) {
@@ -557,15 +564,15 @@ magma_labrd_gpu(
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
- &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, cblas_scalar(&c_one),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
+ &a[i__ + 1 + a_dim1], lda, &y[i__ + y_dim1], ldy, (&c_one),
&a[i__ + 1 + i__ * a_dim1], c__1);
i__2 = m - i__;
if (is_cplx) {
LAPACKE_CHECK(cpu_lapack_lacgv(i__3, &y[i__ + y_dim1], ldy));
}
- cpu_blas_gemv(CblasNoTrans, i__2, i__, cblas_scalar(&c_neg_one),
- &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, cblas_scalar(&c_one),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__, (&c_neg_one),
+ &x[i__ + 1 + x_dim1], ldx, &a[i__ * a_dim1 + 1], c__1, (&c_one),
&a[i__ + 1 + i__ * a_dim1], c__1);
/* Generate reflection Q(i) to annihilate A(i+2:m,i) */
@@ -602,33 +609,33 @@ magma_labrd_gpu(
i__2 = m - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasTransParam, i__2, i__3, cblas_scalar(&c_one), &a[i__ + 1 + a_dim1],
- lda, &a[i__ + 1 + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__3, (&c_one), &a[i__ + 1 + a_dim1],
+ lda, &a[i__ + 1 + i__ * a_dim1], c__1, (&c_zero),
&y[ i__ * y_dim1 + 1], c__1);
i__2 = n - i__;
i__3 = i__ - 1;
- cpu_blas_gemv(CblasNoTrans, i__2, i__3, cblas_scalar(&c_neg_one),
+ cpu_blas_gemv_macro(CblasNoTrans, i__2, i__3, (&c_neg_one),
&y[i__ + 1 + y_dim1], ldy, &y[i__ * y_dim1 + 1], c__1,
- cblas_scalar(&c_zero), f, c__1);
+ (&c_zero), f, c__1);
i__2 = m - i__;
- cpu_blas_gemv(CblasTransParam, i__2, i__, cblas_scalar(&c_one), &x[i__ + 1 + x_dim1],
- ldx, &a[i__ + 1 + i__ * a_dim1], c__1, cblas_scalar(&c_zero),
+ cpu_blas_gemv_macro(CblasTransParam, i__2, i__, (&c_one), &x[i__ + 1 + x_dim1],
+ ldx, &a[i__ + 1 + i__ * a_dim1], c__1, (&c_zero),
&y[i__ * y_dim1 + 1], c__1);
// 4. Synch to make sure the result is back ----------------
magma_event_sync(event);
if (i__3 != 0){
i__2 = n - i__;
- cpu_blas_axpy(i__2, cblas_scalar(&c_one), f,c__1, &y[i__+1+i__*y_dim1],c__1);
+ cpu_blas_axpy(i__2, cblas_scalar(&c_one), cblas_ptr(f),c__1, cblas_ptr(&y[i__+1+i__*y_dim1]),c__1);
}
i__2 = n - i__;
- cpu_blas_gemv(CblasTransParam, i__, i__2, cblas_scalar(&c_neg_one),
+ cpu_blas_gemv_macro(CblasTransParam, i__, i__2, (&c_neg_one),
&a[(i__ + 1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1],
- c__1, cblas_scalar(&c_one), &y[i__ + 1 + i__ * y_dim1], c__1);
+ c__1, (&c_one), &y[i__ + 1 + i__ * y_dim1], c__1);
i__2 = n - i__;
- cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), &y[i__ + 1 + i__ * y_dim1], c__1);
+ cpu_blas_scal(i__2, cblas_scalar(&tauq[i__]), cblas_ptr(&y[i__ + 1 + i__ * y_dim1]), c__1);
}
else {
if (is_cplx) {
diff --git a/src/backend/opencl/magma/magma_cpu_blas.h b/src/backend/opencl/magma/magma_cpu_blas.h
index 6ae4f8f..b3cba09 100644
--- a/src/backend/opencl/magma/magma_cpu_blas.h
+++ b/src/backend/opencl/magma/magma_cpu_blas.h
@@ -41,14 +41,14 @@ typedef int blasint;
template<typename T> \
struct cpu_blas_##NAME##_func;
-#define CPU_BLAS_FUNC1(NAME, TYPE, X) \
- template<> \
- struct cpu_blas_##NAME##_func<TYPE> \
- { \
- template<typename... Args> \
- void \
- operator() (Args... args) \
- { return cblas_##X##NAME(CblasColMajor, args...); } \
+#define CPU_BLAS_FUNC1(NAME, TYPE, X) \
+ template<> \
+ struct cpu_blas_##NAME##_func<TYPE> \
+ { \
+ template<typename... Args> \
+ void \
+ operator() (Args... args) \
+ { cblas_##X##NAME(CblasColMajor, args...); } \
};
#define CPU_BLAS_FUNC2(NAME, TYPE, X) \
@@ -58,7 +58,7 @@ typedef int blasint;
template<typename... Args> \
void \
operator() (Args... args) \
- { return cblas_##X##NAME(args...); } \
+ { cblas_##X##NAME(args...); } \
};
#define CPU_BLAS_DECL1(NAME) \
@@ -81,11 +81,24 @@ CPU_BLAS_DECL2(axpy)
inline float * cblas_ptr(float *in) { return in; }
inline double * cblas_ptr(double *in) { return in; }
+
+#if defined(IS_OPENBLAS)
+inline float * cblas_ptr(magmaFloatComplex *in) { return (float *)in; }
+inline double * cblas_ptr(magmaDoubleComplex *in) { return (double *)in; }
+#else
inline void * cblas_ptr(magmaFloatComplex *in) { return (void *)in; }
inline void * cblas_ptr(magmaDoubleComplex *in) { return (void *)in; }
+#endif
inline float cblas_scalar(float *in) { return *in; }
inline double cblas_scalar(double *in) { return *in; }
+
+#if defined(IS_OPENBLAS)
+inline float *cblas_scalar(magmaFloatComplex *in) { return (float *)in; }
+inline double *cblas_scalar(magmaDoubleComplex *in) { return (double *)in; }
+#else
inline void *cblas_scalar(magmaFloatComplex *in) { return (void *)in; }
inline void *cblas_scalar(magmaDoubleComplex *in) { return (void *)in; }
#endif
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git
More information about the debian-science-commits
mailing list