[arrayfire] 34/75: Force offload OSX LAPACK on unified memory devices

Mon Feb 29 08:01:13 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit 3c06fa081f781d5d590fc6f9f00a76ea09b1d80a
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Feb 16 14:59:18 2016 -0500

    Force offload OSX LAPACK on unified memory devices
---
 src/backend/opencl/blas.cpp     |  2 +-
 src/backend/opencl/platform.cpp | 17 ++++++++++++++---
 src/backend/opencl/platform.hpp |  2 +-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/backend/opencl/blas.cpp b/src/backend/opencl/blas.cpp
index 365e6e5..7753115 100644
--- a/src/backend/opencl/blas.cpp
+++ b/src/backend/opencl/blas.cpp
@@ -121,7 +121,7 @@ Array<T> matmul(const Array<T> &lhs, const Array<T> &rhs,
                 af_mat_prop optLhs, af_mat_prop optRhs)
 {
 #if defined(WITH_OPENCL_LINEAR_ALGEBRA)
-    if(OpenCLCPUOffload()) {
+    if(OpenCLCPUOffload(false)) {   // Do not force offload gemm on OSX Intel devices
         return cpu::matmul(lhs, rhs, optLhs, optRhs);
     }
 #endif
diff --git a/src/backend/opencl/platform.cpp b/src/backend/opencl/platform.cpp
index 6855e79..c2c13c7 100644
--- a/src/backend/opencl/platform.cpp
+++ b/src/backend/opencl/platform.cpp
@@ -514,11 +514,22 @@ bool isHostUnifiedMemory(const cl::Device &device)
     return device.getInfo<CL_DEVICE_HOST_UNIFIED_MEMORY>();
 }
 
-bool OpenCLCPUOffload()
+bool OpenCLCPUOffload(bool forceOffloadOSX)
 {
-    static const bool sync = getEnvVar("AF_OPENCL_CPU_OFFLOAD") == "1";
+    static const bool offloadEnv = getEnvVar("AF_OPENCL_CPU_OFFLOAD") == "1";
     bool offload = false;
-    if(sync) offload = isHostUnifiedMemory(getDevice());
+    if(offloadEnv) offload = isHostUnifiedMemory(getDevice());
+#if OS_MAC
+    // FORCED OFFLOAD FOR LAPACK FUNCTIONS ON OSX UNIFIED MEMORY DEVICES
+    //
+    // On OSX Unified Memory devices (Intel), always offload LAPACK but not GEMM
+    // irrespective of the AF_OPENCL_CPU_OFFLOAD value
+    // From GEMM, OpenCLCPUOffload(false) is called which will render the
+    // variable inconsequential to the returned result.
+    //
+    // Issue https://github.com/arrayfire/arrayfire/issues/662
+    offload = offload || forceOffloadOSX;
+#endif
     return offload;
 }
 
diff --git a/src/backend/opencl/platform.hpp b/src/backend/opencl/platform.hpp
index 4c745e0..095fdf9 100644
--- a/src/backend/opencl/platform.hpp
+++ b/src/backend/opencl/platform.hpp
@@ -114,7 +114,7 @@ cl_device_type getDeviceType();
 
 bool isHostUnifiedMemory(const cl::Device &device);
 
-bool OpenCLCPUOffload();
+bool OpenCLCPUOffload(bool forceOffloadOSX = true);
 
 bool isGLSharingSupported();
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git