[pyfr] 07/88: Merge branch 'develop' into feature/dts.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Nov 16 12:05:24 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository pyfr.

commit e241a7d50c7591e1d54e66aac9cecff2fa14a338
Merge: 22176ce 4f0494a
Author: Freddie Witherden <freddie at witherden.org>
Date:   Wed Apr 13 18:09:25 2016 -0700

    Merge branch 'develop' into feature/dts.
    
    This commit also adds support for the newly introduced MIC
    backend.

 doc/src/conf.py                                    |   4 +-
 doc/src/developer_guide.rst                        |  33 +-
 doc/src/user_guide.rst                             |  92 ++++-
 pyfr/_version.py                                   |   2 +-
 pyfr/backends/__init__.py                          |   1 +
 pyfr/backends/base/backend.py                      |   1 -
 pyfr/backends/mic/__init__.py                      |   3 +
 pyfr/backends/mic/base.py                          |  78 ++++
 pyfr/backends/mic/blasext.py                       |  78 ++++
 pyfr/backends/mic/cblas.py                         |  44 +++
 pyfr/backends/mic/compiler.py                      |  54 +++
 pyfr/backends/mic/generator.py                     | 217 ++++++++++
 .../mic/kernels/__init__.py}                       |   2 -
 pyfr/backends/mic/kernels/axnpby.mako              |  42 ++
 pyfr/backends/mic/kernels/base.mako                |  20 +
 pyfr/backends/mic/kernels/errest.mako              |  26 ++
 pyfr/backends/mic/kernels/gemm.mako                |  14 +
 pyfr/backends/mic/kernels/loop-sched.mako          |  69 ++++
 pyfr/backends/mic/kernels/pack.mako                |  32 ++
 pyfr/backends/mic/packing.py                       |  45 +++
 pyfr/backends/mic/provider.py                      |  29 ++
 pyfr/backends/mic/types.py                         | 141 +++++++
 pyfr/ctypesutil.py                                 |   2 +-
 pyfr/integrators/base.py                           |   7 +
 pyfr/plugins/dtstats.py                            |   6 +-
 pyfr/plugins/fluidforce.py                         |  10 +-
 pyfr/plugins/sampler.py                            |  63 ++-
 pyfr/readers/__init__.py                           |   3 +-
 pyfr/readers/base.py                               | 219 +++++++++++
 pyfr/readers/cgns.py                               | 435 +++++++++++++++++++++
 pyfr/readers/gmsh.py                               | 204 +---------
 pyfr/readers/native.py                             |   1 -
 pyfr/readers/nodemaps.py                           |  69 ++++
 pyfr/shapes.py                                     |   2 +-
 pyfr/solvers/base/inters.py                        |   6 -
 pyfr/solvers/baseadvec/inters.py                   |  12 +-
 pyfr/solvers/euler/inters.py                       |   4 +-
 pyfr/solvers/navstokes/elements.py                 |   4 +-
 pyfr/solvers/navstokes/inters.py                   |  10 +-
 pyfr/solvers/navstokes/kernels/intconu.mako        |   2 +-
 pyfr/solvers/navstokes/kernels/mpicflux.mako       |   2 +-
 pyfr/writers/base.py                               |   2 -
 setup.py                                           |   6 +-
 43 files changed, 1835 insertions(+), 261 deletions(-)

diff --cc pyfr/backends/mic/blasext.py
index 0000000,cb740fb..f00a284
mode 000000,100644..100644
--- a/pyfr/backends/mic/blasext.py
+++ b/pyfr/backends/mic/blasext.py
@@@ -1,0 -1,77 +1,78 @@@
+ # -*- coding: utf-8 -*-
+ 
+ import numpy as np
+ 
+ from pyfr.backends.mic.provider import MICKernelProvider
+ from pyfr.backends.base import ComputeKernel
+ 
+ 
+ class MICBlasExtKernels(MICKernelProvider):
 -    def axnpby(self, *arr):
++    def axnpby(self, *arr, subdims=None):
+         if any(arr[0].traits != x.traits for x in arr[1:]):
+             raise ValueError('Incompatible matrix types')
+ 
+         nv = len(arr)
 -        nrow, leaddim, leadsubdim, dtype = arr[0].traits
++        ncola, ncolb = arr[0].datashape[1:]
++        nrow, ldim, lsdim, dtype = arr[0].traits
+ 
+         # Render the kernel template
 -        src = self.backend.lookup.get_template('axnpby').render(nv=nv)
++        src = self.backend.lookup.get_template('axnpby').render(
++            subdims=subdims or range(ncola), nv=nv
++        )
+ 
+         # Build the kernel
+         kern = self._build_kernel('axnpby', src,
 -                                  [np.int32] + [np.intp]*nv + [dtype]*nv)
 -
 -        # Determine the total element count in the matrices
 -        cnt = leaddim*nrow
++                                  [np.int32]*4 + [np.intp]*nv + [dtype]*nv)
+ 
+         class AxnpbyKernel(ComputeKernel):
+             def run(self, queue, *consts):
+                 args = [x.data for x in arr] + list(consts)
 -                queue.mic_stream_comp.invoke(kern, cnt, *args)
++                queue.mic_stream_comp.invoke(kern, nrow, ncolb, ldim, lsdim,
++                                             *args)
+ 
+         return AxnpbyKernel()
+ 
+     def copy(self, dst, src):
+         if dst.traits != src.traits:
+             raise ValueError('Incompatible matrix types')
+ 
+         class CopyKernel(ComputeKernel):
+             def run(self, queue):
+                 queue.mic_stream_comp.transfer_device2device(
+                     src.basedata, dst.basedata, dst.nbytes, src.offset,
+                     dst.offset
+                 )
+ 
+         return CopyKernel()
+ 
 -    def errest(self, x, y, z):
++    def errest(self, x, y, z, *, norm):
+         if x.traits != y.traits != z.traits:
+             raise ValueError('Incompatible matrix types')
+ 
+         cnt = x.leaddim*x.nrow
+         dtype = x.dtype
+ 
+         # Allocate space for the return value
+         reth = np.zeros(1)
+         retd = self.backend.sdflt.bind(reth, update_device=False)
+ 
+         # Render the reduction kernel template
 -        src = self.backend.lookup.get_template('errest').render()
++        src = self.backend.lookup.get_template('errest').render(norm=norm)
+ 
+         # Build
+         rkern = self._build_kernel(
+             'errest', src, [np.int32] + [np.intp]*4 + [dtype]*2, restype=dtype
+         )
+ 
+         class ErrestKernel(ComputeKernel):
+             @property
+             def retval(self):
+                 return float(reth[0])
+ 
+             def run(self, queue, atol, rtol):
+                 queue.mic_stream_comp.invoke(
+                     rkern, cnt, retd, x.data, y.data, z.data, atol, rtol
+                 )
+                 retd.update_host()
+ 
+         return ErrestKernel()
diff --cc pyfr/backends/mic/kernels/axnpby.mako
index 0000000,a21160e..ddbea4b
mode 000000,100644..100644
--- a/pyfr/backends/mic/kernels/axnpby.mako
+++ b/pyfr/backends/mic/kernels/axnpby.mako
@@@ -1,0 -1,37 +1,42 @@@
+ # -*- coding: utf-8 -*-
+ <%inherit file='base'/>
+ <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
+ 
 -static PYFR_NOINLINE void
 -axnpby_inner(int n,
 -             ${', '.join('fpdtype_t *__restrict__ x{0}, '
 -                         'fpdtype_t a{0}'.format(i) for i in range(nv))})
++void
++axnpby(long *nrowp, long *ncolbp, long *ldimp, long *lsdimp,
++       ${', '.join('fpdtype_t **xp' + str(i) for i in range(nv))},
++       ${', '.join('double *ap' + str(i) for i in range(nv))})
+ {
 -    for (int i = 0; i < n; i++)
 -    {
 -        fpdtype_t axn = ${pyfr.dot('a{j}', 'x{j}[i]', j=(1, nv))};
++    int ldim = *ldimp;
++    int lsdim = *lsdimp;
+ 
 -        if (a0 == 0.0)
 -            x0[i] = axn;
 -        else if (a0 == 1.0)
 -            x0[i] += axn;
 -        else
 -            x0[i] = a0*x0[i] + axn;
 -    }
 -}
++% for i in range(nv):
++    fpdtype_t *x${i} = *xp${i};
++    fpdtype_t  a${i} = *ap${i};
++% endfor
+ 
 -void
 -axnpby(long *n,
 -       ${', '.join('fpdtype_t **x{0}'.format(i) for i in range(nv))},
 -       ${', '.join('double *a{0}'.format(i) for i in range(nv))})
 -{
+     #pragma omp parallel
+     {
 -        int begin, end;
 -        loop_sched_1d(*n, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
++        int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
++        int rb, re, cb, ce;
++        loop_sched_2d(*nrowp, *ncolbp, align, &rb, &re, &cb, &ce);
++
++        for (int r = rb; r < re; r++)
++        {
++        % for k in subdims:
++            for (int i = cb; i < ce; i++)
++            {
++                int idx = i + ldim*r + ${k}*lsdim;
++                fpdtype_t axn = ${pyfr.dot('a{l}', 'x{l}[idx]', l=(1, nv))};
+ 
 -        axnpby_inner(end - begin,
 -                     ${', '.join('*x{0} + begin, *a{0}'.format(i)
 -                                 for i in range(nv))});
++                if (a0 == 0.0)
++                    x0[idx] = axn;
++                else if (a0 == 1.0)
++                    x0[idx] += axn;
++                else
++                    x0[idx] = a0*x0[idx] + axn;
++            }
++        % endfor
++        }
+     }
+ }
diff --cc pyfr/backends/mic/kernels/errest.mako
index 0000000,78b17a2..0303588
mode 000000,100644..100644
--- a/pyfr/backends/mic/kernels/errest.mako
+++ b/pyfr/backends/mic/kernels/errest.mako
@@@ -1,0 -1,20 +1,26 @@@
+ # -*- coding: utf-8 -*-
+ <%inherit file='base'/>
+ <%namespace module='pyfr.backends.base.makoutil' name='pyfr'/>
+ 
+ void
+ errest(long *n, double *out,
+        fpdtype_t **xp, fpdtype_t **yp, fpdtype_t **zp,
+        double *atolp, double *rtolp)
+ {
+     fpdtype_t *x = *xp, *y = *yp, *z = *zp;
+     fpdtype_t atol = *atolp, rtol = *rtolp;
+ 
 -    fpdtype_t sum = 0.0;
++    fpdtype_t err = 0.0;
+ 
 -    #pragma omp parallel for reduction(+:sum)
++% if norm == 'l2':
++    #pragma omp parallel for reduction(+:err)
+     for (int i = 0; i < *n; i++)
 -        sum += pow(x[i]/(atol + rtol*max(fabs(y[i]), fabs(z[i]))), 2);
++        err += pow(x[i]/(atol + rtol*max(fabs(y[i]), fabs(z[i]))), 2);
++% else:
++    #pragma omp parallel for reduction(max:err)
++    for (int i = 0; i < *n; i++)
++        err = max(err, pow(x[i]/(atol + rtol*max(fabs(y[i]), fabs(z[i]))), 2));
++% endif
+ 
 -    *out = sum;
++    *out = err;
+ }
diff --cc pyfr/integrators/base.py
index f1d699d,4074bfc..b275288
--- a/pyfr/integrators/base.py
+++ b/pyfr/integrators/base.py
@@@ -2,7 -2,6 +2,8 @@@
  
  from abc import ABCMeta, abstractmethod, abstractproperty
  from collections import deque
 +import re
++import time
  
  import numpy as np
  
@@@ -64,59 -46,15 +65,62 @@@ class BaseIntegrator(object, metaclass=
          # Get a queue for subclasses to use
          self._queue = backend.queue()
  
 -        # Get the number of degrees of freedom in this partition
 -        ndofs = sum(self.system.ele_ndofs)
 +        # Global degree of freedom count
 +        self._gndofs = self._get_gndofs()
 +
 +        # Bank index of solution
 +        self._idxcurr = 0
 +
 +        # Solution cache
 +        self._curr_soln = None
 +
 +        # Add kernel cache
 +        self._axnpby_kerns = {}
 +
++        # Record the starting wall clock time
++        self._wstart = time.time()
+ 
 +        # Event handlers for advance_to
 +        self.completed_step_handlers = proxylist(self._get_plugins())
 +
 +        # Delete the memory-intensive elements map from the system
 +        del self.system.ele_map
 +
 +    def _get_reg_banks(self, nreg):
 +        regs, regidx = [], list(range(nreg))
 +
 +        # Create a proxylist of matrix-banks for each storage register
 +        for i in regidx:
 +            regs.append(
 +                proxylist([self.backend.matrix_bank(em, i)
 +                           for em in self.system.ele_banks])
 +            )
 +
 +        return regs, regidx
 +
 +    def _get_gndofs(self):
          comm, rank, root = get_comm_rank_root()
  
 +        # Get the number of degrees of freedom in this partition
 +        ndofs = sum(self.system.ele_ndofs)
 +
          # Sum to get the global number over all partitions
 -        self._gndofs = comm.allreduce(ndofs, op=get_mpi('sum'))
 +        return comm.allreduce(ndofs, op=get_mpi('sum'))
  
 -    def _kernel(self, name, nargs):
 +    def _get_plugins(self):
 +        plugins = []
 +
 +        for s in self.cfg.sections():
 +            m = re.match('soln-plugin-(.+?)(?:-(.+))?$', s)
 +            if m:
 +                cfgsect, name, suffix = m.group(0), m.group(1), m.group(2)
 +
 +                # Instantiate
 +                plugins.append(get_plugin(name, self, cfgsect, suffix))
 +
 +        return plugins
 +
 +    def _get_kernels(self, name, nargs, **kwargs):
          # Transpose from [nregs][neletypes] to [neletypes][nregs]
          transregs = zip(*self._regs)
  
@@@ -193,4 -117,4 +197,7 @@@
              self.advance_to(t)
  
      def collect_stats(self, stats):
++        wtime = time.time() - self._wstart
++
          stats.set('solver-time-integrator', 'tcurr', self.tcurr)
++        stats.set('solver-time-integrator', 'wall-time', wtime)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/pyfr.git



More information about the debian-science-commits mailing list