[pyfr] 47/88: Remove the extra one byte of allocation in the OpenCL backend.

Wed Nov 16 12:05:28 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit 18413e63cdfa33312924fd67d0736bea8a39945c
Author: Freddie Witherden <freddie at witherden.org>
Date:   Sun Jun 5 16:20:14 2016 -0700

    Remove the extra one byte of allocation in the OpenCL backend.
    
    The original purpose of this was to avoid a crash in the NVIDIA
    drivers upon exit.  However, the fix no longer seems to work with
    the current NVIDIA drivers and so serves little purpose.
---
 pyfr/backends/opencl/base.py  | 10 +++-------
 pyfr/backends/opencl/types.py |  4 ++--
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/pyfr/backends/opencl/base.py b/pyfr/backends/opencl/base.py
index 7499baa..aadecf9 100644
--- a/pyfr/backends/opencl/base.py
+++ b/pyfr/backends/opencl/base.py
@@ -88,14 +88,10 @@ class OpenCLBackend(BaseBackend):
     def _malloc_impl(self, nbytes):
         import pyopencl as cl
 
-        # Allocate the device buffer; note here that we over allocate
-        # by a byte.  This is needed to work around some issues in
-        # related to the construction of sub buffers.  (For which the
-        # solution is to increase the size of the region by one byte;
-        # hence requiring an extra byte of allocation.)
-        buf = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, nbytes + 1)
+        # Allocate the device buffer
+        buf = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, nbytes)
 
         # Zero the buffer
-        cl.enqueue_copy(self.qdflt, buf, np.zeros(nbytes + 1, dtype=np.uint8))
+        cl.enqueue_copy(self.qdflt, buf, np.zeros(nbytes, dtype=np.uint8))
 
         return buf
diff --git a/pyfr/backends/opencl/types.py b/pyfr/backends/opencl/types.py
index 556736f..31abf18 100644
--- a/pyfr/backends/opencl/types.py
+++ b/pyfr/backends/opencl/types.py
@@ -10,7 +10,7 @@ from pyfr.util import lazyprop
 class OpenCLMatrixBase(base.MatrixBase):
     def onalloc(self, basedata, offset):
         self.basedata = basedata
-        self.data = basedata.get_sub_region(offset, self.nbytes + 1)
+        self.data = basedata.get_sub_region(offset, self.nbytes)
         self.offset = offset
 
         # Process any initial value
@@ -51,7 +51,7 @@ class OpenCLMatrixRSlice(base.MatrixRSlice):
     @lazyprop
     def data(self):
         return self.parent.basedata.get_sub_region(self.offset,
-                                                   self.nrow*self.pitch + 1)
+                                                   self.nrow*self.pitch)
 
     @property
     def _as_parameter_(self):

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git