[python-dtcwt] 03/17: OpenCL fixes for nVidia cards
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Mar 9 11:28:57 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository python-dtcwt.
commit 394094c5db978df8110b55e295d77715a75bd4f4
Author: Rich Wareham <rjw57 at cam.ac.uk>
Date: Tue Mar 1 13:57:06 2016 +0000
OpenCL fixes for nVidia cards
The OpenCL backend fails on the latest nVidia drivers due to not being specific
about which types we're dealing with.
---
dtcwt/opencl/lowlevel.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/dtcwt/opencl/lowlevel.py b/dtcwt/opencl/lowlevel.py
index bc60b82..e5d869a 100644
--- a/dtcwt/opencl/lowlevel.py
+++ b/dtcwt/opencl/lowlevel.py
@@ -19,7 +19,7 @@ import struct
from dtcwt.utils import asfarray, as_column_vector, memoize
def empty(shape, dtype, queue=None):
- return cl_array.empty(to_queue(queue), shape, dtype)
+ return cl_array.empty(to_queue(queue), tuple(shape), dtype)
def colfilter(X, h):
"""Filter the columns of image *X* using filter vector *h*, without decimation.
@@ -253,7 +253,7 @@ def axis_convolve(X, h, axis=0, queue=None, output=None):
output_shape = list(X.shape)
if h.shape[0] % 2 == 0:
output_shape[axis] += 1
- output = cl_array.empty(queue, output_shape, np.float32)
+ output = cl_array.empty(queue, tuple(output_shape), np.float32)
return _apply_kernel(X, h, kern, output, axis=axis)
@@ -266,7 +266,7 @@ def axis_convolve_dfilter(X, h, axis=0, queue=None, output=None):
if output is None:
output_shape = list(X.shape)
output_shape[axis] >>= 1
- output = cl_array.empty(queue, output_shape, np.float32)
+ output = cl_array.empty(queue, tuple(output_shape), np.float32)
return _apply_kernel(X, h, kern, output, axis=axis, elementstep=2)
@@ -279,7 +279,7 @@ def axis_convolve_ifilter(X, h, axis=0, queue=None, output=None):
if output is None:
output_shape = list(X.shape)
output_shape[axis] <<= 1
- output = cl_array.empty(queue, output_shape, np.float32)
+ output = cl_array.empty(queue, tuple(output_shape), np.float32)
return _apply_kernel(X, h, kern, output, axis=axis, elementstep=0.5)
@@ -298,7 +298,7 @@ def q2c(X1, X2, X3, queue=None, output=None):
output_shape[0] >>= 1
output_shape[1] >>= 1
output_shape[2] = 6
- output = cl_array.empty(queue, output_shape, np.complex64)
+ output = cl_array.empty(queue, tuple(output_shape), np.complex64)
# If necessary, convert X
X1_device = to_device(X1, queue)
@@ -619,7 +619,7 @@ void __kernel q2c_kernel(
X1[coord_to_offset(X_coord + (int4)(1,0,0,0), X1_spec)], // c
X1[coord_to_offset(X_coord + (int4)(1,1,0,0), X1_spec)], // d
};
- X1_samples *= sqrt(0.5);
+ X1_samples *= (float)sqrt(0.5);
float4 X2_samples = {
X2[coord_to_offset(X_coord, X2_spec)], // a
@@ -627,7 +627,7 @@ void __kernel q2c_kernel(
X2[coord_to_offset(X_coord + (int4)(1,0,0,0), X2_spec)], // c
X2[coord_to_offset(X_coord + (int4)(1,1,0,0), X2_spec)], // d
};
- X2_samples *= sqrt(0.5);
+ X2_samples *= (float)sqrt(0.5);
float4 X3_samples = {
X3[coord_to_offset(X_coord, X3_spec)], // a
@@ -635,7 +635,7 @@ void __kernel q2c_kernel(
X3[coord_to_offset(X_coord + (int4)(1,0,0,0), X3_spec)], // c
X3[coord_to_offset(X_coord + (int4)(1,1,0,0), X3_spec)], // d
};
- X3_samples *= sqrt(0.5);
+ X3_samples *= (float)sqrt(0.5);
float2 z1a = { X1_samples.x - X1_samples.w, X1_samples.y + X1_samples.z };
float2 z1b = { X1_samples.x + X1_samples.w, X1_samples.y - X1_samples.z };
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/python-dtcwt.git
More information about the debian-science-commits
mailing list