[pyfr] 36/88: Update the GiMMiK kernel providers to support GiMMiK v2.0.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:27 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit 40bfd58fbc1b6e1efae544a70bbd0c3879c42281
Author: Freddie Witherden <freddie at witherden.org>
Date:   Fri May 20 10:41:34 2016 -0700

    Update the GiMMiK kernel providers to support GiMMiK v2.0.
---
 doc/src/user_guide.rst                       | 11 +++++----
 pyfr/backends/cuda/gimmik.py                 | 21 +++++------------
 pyfr/backends/opencl/gimmik.py               | 24 +++++++------------
 pyfr/backends/openmp/base.py                 |  5 ++--
 pyfr/backends/{opencl => openmp}/gimmik.py   | 35 +++++++++++-----------------
 pyfr/backends/openmp/kernels/par-gimmik.mako | 17 ++++++++++++++
 setup.py                                     |  1 +
 7 files changed, 55 insertions(+), 59 deletions(-)

diff --git a/doc/src/user_guide.rst b/doc/src/user_guide.rst
index 07c3d53..7be6872 100644
--- a/doc/src/user_guide.rst
+++ b/doc/src/user_guide.rst
@@ -21,11 +21,12 @@ Overview
 PyFR |release| has a hard dependency on Python 3.3+ and the following
 Python packages:
 
-1. `h5py <http://www.h5py.org/>`_ >= 2.6
-2. `mako <http://www.makotemplates.org/>`_ >= 1.0.0
-3. `mpi4py <http://mpi4py.scipy.org/>`_ >= 2.0
-4. `numpy <http://www.numpy.org/>`_ >= 1.8
-5. `pytools <https://pypi.python.org/pypi/pytools>`_ >= 2014.3
+1. `gimmik <https://github.com/vincentlab/GiMMiK>`_ >= 2.0
+2. `h5py <http://www.h5py.org/>`_ >= 2.6
+3. `mako <http://www.makotemplates.org/>`_ >= 1.0.0
+4. `mpi4py <http://mpi4py.scipy.org/>`_ >= 2.0
+5. `numpy <http://www.numpy.org/>`_ >= 1.8
+6. `pytools <https://pypi.python.org/pypi/pytools>`_ >= 2014.3
 
 Note that due to a bug in `numpy <http://www.numpy.org/>`_ PyFR is not
 compatible with 32-bit Python distributions.
diff --git a/pyfr/backends/cuda/gimmik.py b/pyfr/backends/cuda/gimmik.py
index 1566d26..46a2559 100644
--- a/pyfr/backends/cuda/gimmik.py
+++ b/pyfr/backends/cuda/gimmik.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from gimmik import generate_mm
 import numpy as np
 
 from pyfr.backends.base import ComputeKernel, NotSuitableError
@@ -14,15 +15,7 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
         self.max_nnz = backend.cfg.getint('backend-cuda', 'gimmik-max-nnz',
                                           512)
 
-        try:
-            from gimmik.generator import generateKernel
-
-            self._gen_gimmik = generateKernel
-            self.mul = self._mul_gimmik
-        except ImportError:
-            pass
-
-    def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+    def mul(self, a, b, out, alpha=1.0, beta=0.0):
         # Ensure the matrices are compatible
         if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
             raise ValueError('Incompatible matrices for out = a*b')
@@ -36,13 +29,11 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
             raise NotSuitableError('Matrix too dense for GiMMiK')
 
         # Generate
-        src = self._gen_gimmik(
-            a.get(), 'cuda', alpha=alpha, beta=beta,
-            double=a.dtype == np.float64, reduced=True,
-        )
+        src = generate_mm(a.get(), dtype=a.dtype, platform='cuda',
+                          alpha=alpha, beta=beta)
 
         # Build
-        fun = self._build_kernel('gimmik_mm', src, 'PPiii')
+        fun = self._build_kernel('gimmik_mm', src, 'iPiPi')
 
         # Determine the grid/block
         block = (128, 1, 1)
@@ -51,7 +42,7 @@ class CUDAGiMMiKKernels(CUDAKernelProvider):
         class MulKernel(ComputeKernel):
             def run(self, queue):
                 fun.prepared_async_call(grid, block, queue.cuda_stream_comp,
-                                        b, out, b.ncol, b.leaddim,
+                                        b.ncol, b, b.leaddim, out,
                                         out.leaddim)
 
         return MulKernel()
diff --git a/pyfr/backends/opencl/gimmik.py b/pyfr/backends/opencl/gimmik.py
index 0a3c475..234bb83 100644
--- a/pyfr/backends/opencl/gimmik.py
+++ b/pyfr/backends/opencl/gimmik.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from gimmik import generate_mm
 import numpy as np
 
 from pyfr.backends.base import ComputeKernel, NotSuitableError
@@ -13,15 +14,7 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
         self.max_nnz = backend.cfg.getint('backend-opencl', 'gimmik-max-nnz',
                                           512)
 
-        try:
-            from gimmik.generator import generateKernel
-
-            self._gen_gimmik = generateKernel
-            self.mul = self._mul_gimmik
-        except ImportError:
-            pass
-
-    def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+    def mul(self, a, b, out, alpha=1.0, beta=0.0):
         # Ensure the matrices are compatible
         if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
             raise ValueError('Incompatible matrices for out = a*b')
@@ -35,17 +28,16 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
             raise NotSuitableError('Matrix too dense for GiMMiK')
 
         # Generate
-        src = self._gen_gimmik(
-            a.get(), 'opencl', alpha=alpha, beta=beta,
-            double=a.dtype == np.float64, reduced=True,
-        )
+        src = generate_mm(a.get(), dtype=a.dtype, platform='opencl',
+                          alpha=alpha, beta=beta)
 
         # Build
-        fun = self._build_kernel('gimmik_mm', src, [np.intp]*2 + [np.int32]*3)
+        fun = self._build_kernel('gimmik_mm', src,
+                                 [np.int32] + [np.intp, np.int32]*2)
 
         class MulKernel(ComputeKernel):
             def run(self, queue):
-                fun(queue.cl_queue_comp, (b.ncol,), None, b.data, out.data,
-                    b.ncol, b.leaddim, out.leaddim)
+                fun(queue.cl_queue_comp, (b.ncol,), None, b.ncol,
+                    b.data, b.leaddim, out.data, out.leaddim)
 
         return MulKernel()
diff --git a/pyfr/backends/openmp/base.py b/pyfr/backends/openmp/base.py
index 945f6c2..7830f6d 100644
--- a/pyfr/backends/openmp/base.py
+++ b/pyfr/backends/openmp/base.py
@@ -15,8 +15,8 @@ class OpenMPBackend(BaseBackend):
         # Take the alignment requirement to be 32-bytes
         self.alignb = 32
 
-        from pyfr.backends.openmp import (blasext, cblas, packing, provider,
-                                          types)
+        from pyfr.backends.openmp import (blasext, cblas, gimmik, packing,
+                                          provider, types)
 
         # Register our data types
         self.base_matrix_cls = types.OpenMPMatrixBase
@@ -39,6 +39,7 @@ class OpenMPBackend(BaseBackend):
         kprovcls = [provider.OpenMPPointwiseKernelProvider,
                     blasext.OpenMPBlasExtKernels,
                     packing.OpenMPPackingKernels,
+                    gimmik.OpenMPGiMMiKKernels,
                     cblas.OpenMPCBLASKernels]
         self._providers = [k(self) for k in kprovcls]
 
diff --git a/pyfr/backends/opencl/gimmik.py b/pyfr/backends/openmp/gimmik.py
similarity index 50%
copy from pyfr/backends/opencl/gimmik.py
copy to pyfr/backends/openmp/gimmik.py
index 0a3c475..edaf89c 100644
--- a/pyfr/backends/opencl/gimmik.py
+++ b/pyfr/backends/openmp/gimmik.py
@@ -1,27 +1,20 @@
 # -*- coding: utf-8 -*-
 
+from gimmik import generate_mm
 import numpy as np
 
 from pyfr.backends.base import ComputeKernel, NotSuitableError
-from pyfr.backends.opencl.provider import OpenCLKernelProvider
+from pyfr.backends.openmp.provider import OpenMPKernelProvider
 
 
-class OpenCLGiMMiKKernels(OpenCLKernelProvider):
+class OpenMPGiMMiKKernels(OpenMPKernelProvider):
     def __init__(self, backend):
         super().__init__(backend)
 
-        self.max_nnz = backend.cfg.getint('backend-opencl', 'gimmik-max-nnz',
+        self.max_nnz = backend.cfg.getint('backend-openmp', 'gimmik-max-nnz',
                                           512)
 
-        try:
-            from gimmik.generator import generateKernel
-
-            self._gen_gimmik = generateKernel
-            self.mul = self._mul_gimmik
-        except ImportError:
-            pass
-
-    def _mul_gimmik(self, a, b, out, alpha=1.0, beta=0.0):
+    def mul(self, a, b, out, alpha=1.0, beta=0.0):
         # Ensure the matrices are compatible
         if a.nrow != out.nrow or a.ncol != b.nrow or b.ncol != out.ncol:
             raise ValueError('Incompatible matrices for out = a*b')
@@ -34,18 +27,18 @@ class OpenCLGiMMiKKernels(OpenCLKernelProvider):
         if np.count_nonzero(a.get()) > self.max_nnz:
             raise NotSuitableError('Matrix too dense for GiMMiK')
 
-        # Generate
-        src = self._gen_gimmik(
-            a.get(), 'opencl', alpha=alpha, beta=beta,
-            double=a.dtype == np.float64, reduced=True,
-        )
+        # Generate the GiMMiK kernel
+        gimmik_mm = generate_mm(a.get(), dtype=a.dtype, platform='c',
+                                alpha=alpha, beta=beta)
 
-        # Build
-        fun = self._build_kernel('gimmik_mm', src, [np.intp]*2 + [np.int32]*3)
+        # Generate and build the OpenMP-wrapped GiMMiK kernel
+        tpl = self.backend.lookup.get_template('par-gimmik')
+        src = tpl.render(gimmik_mm=gimmik_mm)
+        par_gimmik_mm = self._build_kernel('par_gimmik_mm', src,
+                                           [np.int32] + [np.intp, np.int32]*2)
 
         class MulKernel(ComputeKernel):
             def run(self, queue):
-                fun(queue.cl_queue_comp, (b.ncol,), None, b.data, out.data,
-                    b.ncol, b.leaddim, out.leaddim)
+                par_gimmik_mm(b.ncol, b, b.leaddim, out, out.leaddim)
 
         return MulKernel()
diff --git a/pyfr/backends/openmp/kernels/par-gimmik.mako b/pyfr/backends/openmp/kernels/par-gimmik.mako
new file mode 100644
index 0000000..150893b
--- /dev/null
+++ b/pyfr/backends/openmp/kernels/par-gimmik.mako
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+<%inherit file='base'/>
+
+// GiMMiK kernel
+${gimmik_mm}
+
+void
+par_gimmik_mm(int N, const fpdtype_t *B, int ldb, fpdtype_t *C, int ldc)
+{
+    #pragma omp parallel
+    {
+        int begin, end;
+        loop_sched_1d(N, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
+
+        gimmik_mm(end - begin, B + begin, ldb, C + begin, ldc);
+    }
+}
diff --git a/setup.py b/setup.py
index b24b7f3..1e9f2e0 100755
--- a/setup.py
+++ b/setup.py
@@ -93,6 +93,7 @@ data_files = [
 
 # Hard dependencies
 install_requires = [
+    'gimmik >= 2.0',
     'h5py >= 2.6',
     'mako >= 1.0.0',
     'mpi4py >= 2.0',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git



More information about the debian-science-commits mailing list